Spaces:

Jiangxz01
/

Generated_Podcast_Audio

Running

App Files Files

Jiangxz01 commited on Sep 30, 2024

Commit

216e871

verified ·

1 Parent(s): eda5fa8

Upload 2 files

Browse files

Files changed (2) hide show

app.py +61 -17
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -10,6 +10,30 @@ import json
 import os
 import re
 import time
 def create_client(api_key=None):
     if api_key:
@@ -84,7 +108,7 @@ Follow this JSON example structure, MUST be in {language} language:
 <podcast_dialogue>
 根據你在頭腦風暴階段提出的關鍵點和創造性想法，撰寫一段引人入勝且訊息豐富的播客對話。採用對話式的語氣，並包括任何必要的上下文或解釋，使內容對一般聽眾容易理解。使用主持人名字 {speaker1_name} 和嘉賓名字 {speaker2_name}，為聽眾營造更吸引人和身臨其境的聆聽體驗。不要包括像[主持人]或[嘉賓]這樣的括號預留位置。設計你的輸出內容必須適合直接朗讀，因為它將直接轉換為音訊。
 確保對話儘可能詳細且完整，同時保持在主題之內並維持吸引人的流暢性。目標是使用你的全部輸出容量，建立儘可能長的播客節目，同時以娛樂性的方式傳達輸入文字中的關鍵訊息。
-在對話結束時，讓主持人和嘉賓自然總結他們討論中的主要見解和要點，這應當是對話的隨機部分，以自然隨意而非明顯的總結 - 目的是在結束前最後一次以自然流暢的方式強化核心思想。最終以感謝詞結束。
 </podcast_dialogue>
 """
     client = create_client(api_key)
@@ -187,9 +211,15 @@ async def tts_generate(input_text, speaker1, speaker2):
     gr.Info(f"已成功生成 Podcast 音檔，執行時間： {(end_time - start_time):.2f} 秒。")
     return output_file
-async def process_podcast(input_text, language, speaker1, speaker2, api_key):
     gr.Info("開始生成 Podcast 節目及音檔，請稍待片刻......")
     start_time = time.time()
     podcast_script = generate_response(input_text, language, speaker1, speaker2, api_key)
     speaker1_name = speaker1.split(' - ')[0]
     speaker2_name = speaker2.split(' - ')[0]
@@ -209,7 +239,7 @@ async def process_podcast(input_text, language, speaker1, speaker2, api_key):
     audio_file = await tts_generate(podcast_script, speaker1, speaker2)
     end_time = time.time()
     gr.Info(f"已成功完成 Podcast 節目及音檔，總執行時間： {(end_time - start_time):.2f} 秒。")
-    gr.Info("請待本訊息自動消失後即可播放或下載 Podcast 音檔！！")
     return podcast_text, audio_file
 custom_css = """
@@ -254,34 +284,41 @@ body {
     border-radius: 10px !important;
     margin: 0 !important;
 }
 .lng-background {
     background-color: #FFF5CD !important;
-    padding: 5px !important;
     border-radius: 10px !important;
     margin: 0 !important;
 }
 .sk1-background {
     background-color: #FFF5CD !important;
-    padding: 5px !important;
     border-radius: 10px !important;
     margin: 0 !important;
 }
 .sk2-background {
     background-color: #FFF5CD !important;
-    padding: 5px !important;
     border-radius: 10px !important;
     margin: 0 !important;
 }
 .clear-button {
     color: black !important;
     background-color: #FFCFB3 !important;
-    padding: 5px !important;
     border-radius: 10px !important;
     margin: 0 !important;
 }
 .api-background {
     background-color: #FFCFB3 !important;
-    padding: 5px !important;
     border-radius: 10px !important;
     margin: 0 !important;
 }
@@ -305,19 +342,26 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
     > ### **※ 玩轉聲音魅力，開拓更多可能性，自動生成 Podcast 節目及音檔，系統布署：江信宗，LLM：Llama-3.1-405B-Instruct。**
     """, elem_classes="center-aligned")
-    input_text = gr.Textbox(
-        label="請輸入 Podcast 話題（建議50至1000字）",
-        placeholder="輸入 Podcast 話題內容，受限 LLM Context Length，建議1000字以內 ......",
-        elem_classes="input-background",
-        max_lines=20
-    )
     def check_input_length(text):
         if 0 < len(text) < 4:
             return gr.Warning("輸入內容過短，請提供明確的話題內容。")
         elif len(text) > 4096:
             return gr.Warning("輸入內容已超過 max tokens，請縮短話題內容。")
     input_text.change(fn=check_input_length, inputs=[input_text])
     with gr.Row():
@@ -369,7 +413,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
         )
         clear_input_text_button = gr.Button("清除Podcast話題", scale=1, elem_classes="clear-button")
-        clear_input_text_button.click(fn=lambda: "", inputs=None, outputs=input_text)
     with gr.Row():
         generate_button = gr.Button("生成 Podcast 節目及音檔", scale=2, elem_classes="gen-button")
@@ -377,7 +421,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
     audio_output = gr.Audio(label="Generated Podcast Audio", elem_classes="audio-background")
     podcast_script = gr.Textbox(label="Generated Podcast 文稿", elem_classes="script-background")
-    generate_button.click(fn=process_podcast, inputs=[input_text, Language, Speaker_1, Speaker_2, api_key], outputs=[podcast_script, audio_output])
 if __name__ == "__main__":

 import os
 import re
 import time
+import aiofiles
+import pypdf
+import io
+class TextExtractor:
+    @staticmethod
+    async def extract_from_pdf(file_path: str) -> str:
+        async with aiofiles.open(file_path, 'rb') as file:
+            content = await file.read()
+            pdf_reader = pypdf.PdfReader(io.BytesIO(content))
+            return "\n\n".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
+    @staticmethod
+    async def extract_from_txt(file_path: str) -> str:
+        async with aiofiles.open(file_path, 'r') as file:
+            return await file.read()
+    @classmethod
+    async def extract_text(cls, file_path: str) -> str:
+        _, file_extension = os.path.splitext(file_path)
+        if file_extension.lower() == '.pdf':
+            return await cls.extract_from_pdf(file_path)
+        elif file_extension.lower() == '.txt':
+            return await cls.extract_from_txt(file_path)
+        else:
+            raise gr.Error(f"Unsupported file type: {file_extension}")
 def create_client(api_key=None):
     if api_key:
 <podcast_dialogue>
 根據你在頭腦風暴階段提出的關鍵點和創造性想法，撰寫一段引人入勝且訊息豐富的播客對話。採用對話式的語氣，並包括任何必要的上下文或解釋，使內容對一般聽眾容易理解。使用主持人名字 {speaker1_name} 和嘉賓名字 {speaker2_name}，為聽眾營造更吸引人和身臨其境的聆聽體驗。不要包括像[主持人]或[嘉賓]這樣的括號預留位置。設計你的輸出內容必須適合直接朗讀，因為它將直接轉換為音訊。
 確保對話儘可能詳細且完整，同時保持在主題之內並維持吸引人的流暢性。目標是使用你的全部輸出容量，建立儘可能長的播客節目，同時以娛樂性的方式傳達輸入文字中的關鍵訊息。
+在對話結束時，讓主持人和嘉賓自然總結他們討論中的主要見解和要點，這應當是對話的隨機部分，以自然隨意而非明顯刻意的總結 - 目的是在結束前最後一次以自然流暢的方式強化核心思想。最終以感謝詞結束。
 </podcast_dialogue>
 """
     client = create_client(api_key)
     gr.Info(f"已成功生成 Podcast 音檔，執行時間： {(end_time - start_time):.2f} 秒。")
     return output_file
+async def process_podcast(input_text, input_file, language, speaker1, speaker2, api_key):
     gr.Info("開始生成 Podcast 節目及音檔，請稍待片刻......")
     start_time = time.time()
+    input_text = input_text.strip()
+    if input_file:
+        input_text = await TextExtractor.extract_text(input_file.name)
+        if not input_text.strip():
+            gr.Warning("PDF檔案不得為掃描圖片檔，請您確認正確輸入文字或上傳PDF文字檔。")
+            return None, None
     podcast_script = generate_response(input_text, language, speaker1, speaker2, api_key)
     speaker1_name = speaker1.split(' - ')[0]
     speaker2_name = speaker2.split(' - ')[0]
     audio_file = await tts_generate(podcast_script, speaker1, speaker2)
     end_time = time.time()
     gr.Info(f"已成功完成 Podcast 節目及音檔，總執行時間： {(end_time - start_time):.2f} 秒。")
+    gr.Info("請等待本訊息自動消失後即可播放或下載 Podcast 音檔！！")
     return podcast_text, audio_file
 custom_css = """
     border-radius: 10px !important;
     margin: 0 !important;
 }
+.file-background {
+    background-color: #B7E0FF !important;
+    padding: 15px !important;
+    border-radius: 10px !important;
+    margin: 0 !important;
+    height: 135px !important;
+}
 .lng-background {
     background-color: #FFF5CD !important;
+    padding: 10px !important;
     border-radius: 10px !important;
     margin: 0 !important;
 }
 .sk1-background {
     background-color: #FFF5CD !important;
+    padding: 10px !important;
     border-radius: 10px !important;
     margin: 0 !important;
 }
 .sk2-background {
     background-color: #FFF5CD !important;
+    padding: 10px !important;
     border-radius: 10px !important;
     margin: 0 !important;
 }
 .clear-button {
     color: black !important;
     background-color: #FFCFB3 !important;
+    padding: 10px !important;
     border-radius: 10px !important;
     margin: 0 !important;
 }
 .api-background {
     background-color: #FFCFB3 !important;
+    padding: 15px !important;
     border-radius: 10px !important;
     margin: 0 !important;
 }
     > ### **※ 玩轉聲音魅力，開拓更多可能性，自動生成 Podcast 節目及音檔，系統布署：江信宗，LLM：Llama-3.1-405B-Instruct。**
     """, elem_classes="center-aligned")
+    with gr.Row():
+        input_text = gr.Textbox(
+            label="請輸入 Podcast 話題（建議50至1000字）",
+            placeholder="輸入 Podcast 話題內容，受限 LLM Context Length，建議1000字以內 ......",
+            elem_classes="input-background",
+            max_lines=20,
+            scale=3
+        )
+        fileName = gr.File(
+            file_types=[".pdf", ".txt"],
+            label="或上傳 PDF 檔",
+            scale=1,
+            elem_classes="file-background"
+        )
     def check_input_length(text):
         if 0 < len(text) < 4:
             return gr.Warning("輸入內容過短，請提供明確的話題內容。")
         elif len(text) > 4096:
             return gr.Warning("輸入內容已超過 max tokens，請縮短話題內容。")
     input_text.change(fn=check_input_length, inputs=[input_text])
     with gr.Row():
         )
         clear_input_text_button = gr.Button("清除Podcast話題", scale=1, elem_classes="clear-button")
+        clear_input_text_button.click(fn=lambda: (None, None), inputs=None, outputs=[input_text, fileName])
     with gr.Row():
         generate_button = gr.Button("生成 Podcast 節目及音檔", scale=2, elem_classes="gen-button")
     audio_output = gr.Audio(label="Generated Podcast Audio", elem_classes="audio-background")
     podcast_script = gr.Textbox(label="Generated Podcast 文稿", elem_classes="script-background")
+    generate_button.click(fn=process_podcast, inputs=[input_text, fileName, Language, Speaker_1, Speaker_2, api_key], outputs=[podcast_script, audio_output])
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 gradio
 openai
 pydub
-edge-tts

 gradio
 openai
 pydub
+edge-tts
+aiofiles
+pypdf