Hieucyber2208 commited on
Commit
eb133a3
·
verified ·
1 Parent(s): 8f9dd80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -46
app.py CHANGED
@@ -1,50 +1,116 @@
1
- import torch
2
- from transformers import VitsModel, AutoTokenizer
3
- import torchaudio
 
 
 
 
 
 
4
  import os
5
- from gtts import gTTS
6
 
7
- def generate_audio(text, filename="output.mp3", gender="female", speed="normal"):
8
- """
9
- Convert text to speech and save it as an audio file.
10
-
11
- Parameters:
12
- text (str): The text to convert.
13
- filename (str): The output file name.
14
- gender (str): "male" (use MMS-TTS) or "female" (use gTTS).
15
- speed (str): "slow", "normal", or "fast" (only for gTTS).
16
- """
17
- lang = "vi"
18
-
19
- if gender.lower() == "female":
20
- # gTTS chỉ có giọng nữ
21
- speed_mapping = {"slow": True, "normal": False, "fast": False}
22
- slow = speed_mapping.get(speed.lower(), False)
23
-
24
- tts = gTTS(text=text, lang=lang, slow=slow)
25
- tts.save(filename)
26
- print(f"✅ Audio saved as {filename}")
27
-
28
- elif gender.lower() == "male":
29
- # MMS-TTS cho giọng nam
30
- model = VitsModel.from_pretrained("facebook/mms-tts-vie")
31
- tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-vie")
32
-
33
- inputs = tokenizer(text, return_tensors="pt")
34
- with torch.no_grad():
35
- output = model(**inputs).waveform
36
-
37
- # Lưu file âm thanh
38
- torchaudio.save(filename, output, 24000, backend="sox_io")
39
- print(f"✅ Audio saved as {filename}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  else:
42
- print("⚠️ Giọng không hợp lệ! Chỉ hỗ trợ 'male' hoặc 'female'.")
43
- def text_to_speech(gender, speed):
44
- text_folder = "./"
45
- text_files = sorted([f for f in os.listdir(text_folder) if f.endswith('.txt') and f != "text.txt" and f != "requirements.txt"])
46
- for text_file in text_files:
47
- with open(f"{text_file}", "r", encoding="utf-8") as file:
48
- content = file.read()
49
- audio_file = text_file.replace("txt","mp3")
50
- generate_audio(content, f"{audio_file}", gender=gender, speed=speed)
 
1
+ import asyncio
2
+
3
+ try:
4
+ asyncio.get_running_loop()
5
+ except RuntimeError:
6
+ asyncio.run(asyncio.sleep(0)) # Ensures an event loop is created before Streamlit starts
7
+
8
+ import streamlit as st
9
+ from main import main
10
  import os
11
+ import subprocess
12
 
13
+ # Định nghĩa đường dẫn video đầu ra
14
+ OUTPUT_VIDEO_PATH = "final_output.mp4"
15
+ OUTPUT_VIDEO_FIXED_PATH = "final_output_fixed.mp4"
16
+
17
+ # Tiêu đề ứng dụng
18
+ st.set_page_config(page_title="KnowFlow", page_icon="📖")
19
+ st.markdown("<h1 style='text-align: center;'>📖 KnowFlow 🌊</h1>", unsafe_allow_html=True)
20
+ st.markdown("<h4 style='text-align: center;'>Convert documents into videos with AI-powered storytelling</h4>", unsafe_allow_html=True)
21
+
22
+ # Thông tin tác giả
23
+ st.markdown("---")
24
+ st.markdown("👨‍💻 **Author:** Nguyễn Trung Hiếu")
25
+ st.markdown("🔗 [GitHub Repository](https://github.com/hieunguyen-cyber/KnowFlow.git)")
26
+ st.markdown("---")
27
+ st.markdown("""
28
+ ## 🎯 Purpose
29
+ KnowFlow automates the process of converting lecture documents (PDF, DOCX) into narrated videos with structured explanations. It extracts text, formulas, and images, generates explanations, converts text to speech, and assembles everything into a video.
30
+
31
+ ## 🛠️ How to Use
32
+ 1️⃣ **Upload a lecture file (PDF, DOCX)**.
33
+ 2️⃣ **Select processing options** (text extraction, summarization, TTS).
34
+ 3️⃣ **Generate the video** – the system will process and compile it.
35
+ 4️⃣ **Download the final video** for review or sharing.
36
+
37
+ 🚀 Fully open-source and free to use! \n
38
+ If you find it's slow, then another person must be using the GPU. Please wait!!
39
+ """)
40
+ # Upload file PDF
41
+ uploaded_file = st.file_uploader("📂 Upload your document (PDF)", type=["pdf","docx"])
42
+
43
+ # Nếu file, lưu vào thư mục tạm và lấy đường dẫn
44
+ file_path = None
45
+ if uploaded_file:
46
+ file_path = f"{uploaded_file.name}"
47
+ with open(file_path, "wb") as f:
48
+ f.write(uploaded_file.getbuffer()) # Lưu file thực tế
49
+ number_of_images = st.slider("🖼️ Nhập số ảnh",1,10,3)
50
+ # Cấu hình đầu vào
51
+ gender = st.radio("🗣️ Select Voice Gender", options=["female", "male"])
52
+
53
+ # Nếu chọn giọng nam, vô hiệu hóa tốc độ (chỉ cho phép "normal")
54
+ if gender == "male":
55
+ speed = st.radio("⚡ Speech Speed (Male voice supports only normal)", options=["normal"], disabled=True)
56
+ else:
57
+ speed = st.radio("⚡ Speech Speed", options=["fast", "normal", "slow"])
58
+ analysis_level = st.radio("Analysis Level", options=["basic", "detailed"])
59
+ writting_style = st.radio("Writting Style", options = ["academic","popular","creative","humorous"])
60
+
61
+ # Tạo thanh trượt với giá trị từ 50 đến 250, bước nhảy 50
62
+ word_lower_limit, word_upper_limit = st.slider(
63
+ "Chọn khoảng độ dài văn bản:",
64
+ min_value=50,
65
+ max_value=250,
66
+ value=(50, 250), # Giá trị mặc định
67
+ step=50
68
+ )
69
+
70
+ st.write(f"Giới hạn độ dài văn bản từ **{word_lower_limit}** đến **{word_upper_limit}** ký tự.")
71
+ detail_level = st.radio("📖 Detail Level of Image Description", options=["short", "detailed"])
72
+ perspective = st.radio("🔎 Perspective", options=["subjective", "neutral"])
73
+ emotion = st.text_input("🎭 Emotion", placeholder="Example: mysterious, romantic,...")
74
+ time_setting = st.text_input("⏳ Time Setting", placeholder="Example: modern, medieval,...")
75
+ art_style = st.text_input("🖌️ Image Description Style", placeholder="Example: realistic, abstract,...")
76
+ style = st.text_input("🎨 Image Style", placeholder="Example: realistic, anime,...")
77
+ color_palette = st.text_input("🌈 Color Palette", placeholder="Example: vibrant, monochrome,...")
78
+
79
+ def convert_audio_format(video_input, video_output):
80
+ """Chuyển đổi định dạng âm thanh của video sang AAC."""
81
+ if not os.path.exists(video_input):
82
+ raise FileNotFoundError(f"File '{video_input}' không tồn tại!")
83
+
84
+ command = [
85
+ "ffmpeg", "-i", video_input,
86
+ "-c:v", "copy", "-c:a", "aac", "-b:a", "192k",
87
+ "-y", # Ghi đè nếu file output đã tồn tại
88
+ video_output
89
+ ]
90
 
91
+ try:
92
+ subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
93
+ print(f"✅ Chuyển đổi thành công: {video_output}")
94
+ except subprocess.CalledProcessError as e:
95
+ print(f"❌ Lỗi khi chuyển đổi video: {e.stderr.decode()}")
96
+
97
+ # Nút chạy pipeline
98
+ if st.button("🚀 Generate Video"):
99
+ if file_path and os.path.exists(file_path):
100
+ st.success("⏳ Processing started...")
101
+ main(file_path, analysis_level, writting_style, word_lower_limit, word_upper_limit, gender, speed, number_of_images, detail_level, perspective, emotion, time_setting, art_style, style, color_palette)
102
+
103
+ # Kiểm tra xem video đã được tạo chưa
104
+ if os.path.exists(OUTPUT_VIDEO_PATH):
105
+ st.success("🎉 Video generated successfully!")
106
+
107
+ # Chuyển đổi định dạng âm thanh
108
+ convert_audio_format(OUTPUT_VIDEO_PATH, OUTPUT_VIDEO_FIXED_PATH)
109
+
110
+ # Tạo link tải về
111
+ with open(OUTPUT_VIDEO_FIXED_PATH, "rb") as video_file:
112
+ st.download_button(label="📥 Download Video", data=video_file, file_name="final_output_fixed.mp4", mime="video/mp4")
113
+ else:
114
+ st.error("⚠️ Video generation failed. Please check the logs.")
115
  else:
116
+ st.error("⚠️ Please upload a valid PDF file.")