Spaces:

Hieucyber2208
/

know-flow

Sleeping

App Files Files Community

Hieucyber2208 commited on Mar 2

Commit

eb133a3

verified ·

1 Parent(s): 8f9dd80

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -46

app.py CHANGED Viewed

@@ -1,50 +1,116 @@
-import torch
-from transformers import VitsModel, AutoTokenizer
-import torchaudio
 import os
-from gtts import gTTS
-def generate_audio(text, filename="output.mp3", gender="female", speed="normal"):
-    """
-    Convert text to speech and save it as an audio file.
-    Parameters:
-        text (str): The text to convert.
-        filename (str): The output file name.
-        gender (str): "male" (use MMS-TTS) or "female" (use gTTS).
-        speed (str): "slow", "normal", or "fast" (only for gTTS).
-    """
-    lang = "vi"
-    if gender.lower() == "female":
-        # gTTS chỉ có giọng nữ
-        speed_mapping = {"slow": True, "normal": False, "fast": False}
-        slow = speed_mapping.get(speed.lower(), False)
-        tts = gTTS(text=text, lang=lang, slow=slow)
-        tts.save(filename)
-        print(f"✅ Audio saved as {filename}")
-    elif gender.lower() == "male":
-        # MMS-TTS cho giọng nam
-        model = VitsModel.from_pretrained("facebook/mms-tts-vie")
-        tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-vie")
-        inputs = tokenizer(text, return_tensors="pt")
-        with torch.no_grad():
-            output = model(**inputs).waveform
-        # Lưu file âm thanh
-        torchaudio.save(filename, output, 24000, backend="sox_io")
-        print(f"✅ Audio saved as {filename}")
     else:
-        print("⚠️ Giọng không hợp lệ! Chỉ hỗ trợ 'male' hoặc 'female'.")
-def text_to_speech(gender, speed):
-    text_folder = "./"
-    text_files = sorted([f for f in os.listdir(text_folder) if f.endswith('.txt') and f != "text.txt" and f != "requirements.txt"])
-    for text_file in text_files:
-        with open(f"{text_file}", "r", encoding="utf-8") as file:
-            content = file.read()
-        audio_file = text_file.replace("txt","mp3")
-        generate_audio(content, f"{audio_file}", gender=gender, speed=speed)

+import asyncio
+try:
+    asyncio.get_running_loop()
+except RuntimeError:
+    asyncio.run(asyncio.sleep(0))  # Ensures an event loop is created before Streamlit starts
+import streamlit as st
+from main import main
 import os
+import subprocess
+# Định nghĩa đường dẫn video đầu ra
+OUTPUT_VIDEO_PATH = "final_output.mp4"
+OUTPUT_VIDEO_FIXED_PATH = "final_output_fixed.mp4"
+# Tiêu đề ứng dụng
+st.set_page_config(page_title="KnowFlow", page_icon="📖")
+st.markdown("<h1 style='text-align: center;'>📖 KnowFlow 🌊</h1>", unsafe_allow_html=True)
+st.markdown("<h4 style='text-align: center;'>Convert documents into videos with AI-powered storytelling</h4>", unsafe_allow_html=True)
+# Thông tin tác giả
+st.markdown("---")
+st.markdown("👨‍💻 **Author:** Nguyễn Trung Hiếu")
+st.markdown("🔗 [GitHub Repository](https://github.com/hieunguyen-cyber/KnowFlow.git)")
+st.markdown("---")
+st.markdown("""
+## 🎯 Purpose
+KnowFlow automates the process of converting lecture documents (PDF, DOCX) into narrated videos with structured explanations. It extracts text, formulas, and images, generates explanations, converts text to speech, and assembles everything into a video.
+## 🛠️ How to Use
+1️⃣ **Upload a lecture file (PDF, DOCX)**.
+2️⃣ **Select processing options** (text extraction, summarization, TTS).
+3️⃣ **Generate the video** – the system will process and compile it.
+4️⃣ **Download the final video** for review or sharing.
+🚀 Fully open-source and free to use! \n
+If you find it's slow, then another person must be using the GPU. Please wait!!
+""")
+# Upload file PDF
+uploaded_file = st.file_uploader("📂 Upload your document (PDF)", type=["pdf","docx"])
+# Nếu có file, lưu vào thư mục tạm và lấy đường dẫn
+file_path = None
+if uploaded_file:
+    file_path = f"{uploaded_file.name}"
+    with open(file_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())  # Lưu file thực tế
+number_of_images = st.slider("🖼️ Nhập số ảnh",1,10,3)
+# Cấu hình đầu vào
+gender = st.radio("🗣️ Select Voice Gender", options=["female", "male"])
+# Nếu chọn giọng nam, vô hiệu hóa tốc độ (chỉ cho phép "normal")
+if gender == "male":
+    speed = st.radio("⚡ Speech Speed (Male voice supports only normal)", options=["normal"], disabled=True)
+else:
+    speed = st.radio("⚡ Speech Speed", options=["fast", "normal", "slow"])
+analysis_level = st.radio("Analysis Level", options=["basic", "detailed"])
+writting_style = st.radio("Writting Style", options  = ["academic","popular","creative","humorous"])
+# Tạo thanh trượt với giá trị từ 50 đến 250, bước nhảy 50
+word_lower_limit, word_upper_limit = st.slider(
+    "Chọn khoảng độ dài văn bản:",
+    min_value=50,
+    max_value=250,
+    value=(50, 250),  # Giá trị mặc định
+    step=50
+)
+st.write(f"Giới hạn độ dài văn bản từ **{word_lower_limit}** đến **{word_upper_limit}** ký tự.")
+detail_level = st.radio("📖 Detail Level of Image Description", options=["short", "detailed"])
+perspective = st.radio("🔎 Perspective", options=["subjective", "neutral"])
+emotion = st.text_input("🎭 Emotion", placeholder="Example: mysterious, romantic,...")
+time_setting = st.text_input("⏳ Time Setting", placeholder="Example: modern, medieval,...")
+art_style = st.text_input("🖌️ Image Description Style", placeholder="Example: realistic, abstract,...")
+style = st.text_input("🎨 Image Style", placeholder="Example: realistic, anime,...")
+color_palette = st.text_input("🌈 Color Palette", placeholder="Example: vibrant, monochrome,...")
+def convert_audio_format(video_input, video_output):
+    """Chuyển đổi định dạng âm thanh của video sang AAC."""
+    if not os.path.exists(video_input):
+        raise FileNotFoundError(f"File '{video_input}' không tồn tại!")
+    command = [
+        "ffmpeg", "-i", video_input,
+        "-c:v", "copy", "-c:a", "aac", "-b:a", "192k",
+        "-y",  # Ghi đè nếu file output đã tồn tại
+        video_output
+    ]
+    try:
+        subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        print(f"✅ Chuyển đổi thành công: {video_output}")
+    except subprocess.CalledProcessError as e:
+        print(f"❌ Lỗi khi chuyển đổi video: {e.stderr.decode()}")
+# Nút chạy pipeline
+if st.button("🚀 Generate Video"):
+    if file_path and os.path.exists(file_path):
+        st.success("⏳ Processing started...")
+        main(file_path, analysis_level, writting_style, word_lower_limit, word_upper_limit, gender, speed, number_of_images, detail_level, perspective, emotion, time_setting, art_style, style, color_palette)
+        # Kiểm tra xem video đã được tạo chưa
+        if os.path.exists(OUTPUT_VIDEO_PATH):
+            st.success("🎉 Video generated successfully!")
+            # Chuyển đổi định dạng âm thanh
+            convert_audio_format(OUTPUT_VIDEO_PATH, OUTPUT_VIDEO_FIXED_PATH)
+            # Tạo link tải về
+            with open(OUTPUT_VIDEO_FIXED_PATH, "rb") as video_file:
+                st.download_button(label="📥 Download Video", data=video_file, file_name="final_output_fixed.mp4", mime="video/mp4")
+        else:
+            st.error("⚠️ Video generation failed. Please check the logs.")
     else:
+        st.error("⚠️ Please upload a valid PDF file.")