Spaces:

Jiangxz01
/

Generated_Podcast_Audio

Running

App Files Files

Jiangxz01 commited on Sep 27, 2024

Commit

1a2d948

verified ·

1 Parent(s): 3e08982

Upload app.py

Browse files

Files changed (1) hide show

app.py +59 -40

app.py CHANGED Viewed

@@ -7,10 +7,7 @@ import os
 import re
 from pydub import AudioSegment
 import uuid
-import io
 import edge_tts
-import asyncio
-import aiofiles
 import json
 def create_client(api_key=None):
@@ -100,37 +97,46 @@ Follow this JSON example structure, MUST be in {language} language:
         podcast_match = re.search(r'{.*}', response.choices[0].message.content, re.DOTALL)
         if podcast_match:
             podcast_json = podcast_match.group(0)
-            # 嘗試解析 JSON，如果失敗則進行清理
             try:
                 json.loads(podcast_json)
             except json.JSONDecodeError:
-                # 清理 JSON 字符串
-                podcast_json = re.sub(r',\s*}', '}', podcast_json)  # 移除最後一個逗號
-                podcast_json = re.sub(r',\s*]', ']', podcast_json)  # 移除數組最後一個逗號
             return podcast_json
         else:
-            raise gr.Error("Failed to generate podcast script. Please try again.")
     except Exception as e:
         if "API key not valid" in str(e):
-            raise gr.Error("Invalid API key. Please provide a valid API key.")
         elif "rate limit" in str(e).lower():
-            raise gr.Error("Rate limit exceeded for the API key. Please try again later or provide your own API key.")
         else:
-            raise gr.Error(f"Failed to generate podcast script: {e}")
 async def tts_generate(input_text, speaker1, speaker2):
     voice_names = {
-        "YunJhe - 中文 (Taiwan)": "zh-TW-YunJheNeural",
-        "HsiaoChen - 中文 (Taiwan)": "zh-TW-HsiaoChenNeural",
-        "HsiaoYu - 中文 (Taiwan)": "zh-TW-HsiaoYuNeural",
-        "Andrew - English (United States)": "en-US-AndrewMultilingualNeural",
-        "Ava - English (United States)": "en-US-AvaMultilingualNeural",
-        "Brian - English (United States)": "en-US-BrianMultilingualNeural",
-        "Emma - English (United States)": "en-US-EmmaMultilingualNeural",
-        "Florian - German (Germany)": "de-DE-FlorianMultilingualNeural",
-        "Seraphina - German (Germany)": "de-DE-SeraphinaMultilingualNeural",
-        "Remy - French (France)": "fr-FR-RemyMultilingualNeural",
-        "Vivienne - French (France)": "fr-FR-VivienneMultilingualNeural"
     }
     speaker1_voice = voice_names[speaker1]
@@ -166,14 +172,14 @@ async def tts_generate(input_text, speaker1, speaker2):
         audio = AudioSegment.from_mp3(audio_file)
         combined += audio
-        os.remove(audio_file)  # 刪除臨時文件
         podcast_json["podcast"].append({
             "speaker": speaker_map.get(speaker, speaker),
             "line": text
         })
-    output_file = f"combined_{uuid.uuid4()}.mp3"
     combined.export(output_file, format="mp3")
     return output_file
@@ -185,7 +191,7 @@ async def process_podcast(input_text, language, speaker1, speaker2, api_key):
 with gr.Blocks() as iface:
     gr.Markdown("# 🎙️ Generated Podcast Audio. Deployed by 江信宗")
-    input_text = gr.Textbox(label="請輸入 Podcast 話題（建議50~500字之間）")
     with gr.Row():
         Language = gr.Dropdown(
@@ -197,39 +203,52 @@ with gr.Blocks() as iface:
         )
         speaker_choices = [
-            "YunJhe - 中文 (Taiwan)",
-            "HsiaoChen - 中文 (Taiwan)",
-            "HsiaoYu - 中文 (Taiwan)",
-            "Andrew - English (United States)",
-            "Ava - English (United States)",
-            "Brian - English (United States)",
-            "Emma - English (United States)",
-            "Florian - German (Germany)",
-            "Seraphina - German (Germany)",
-            "Remy - French (France)",
-            "Vivienne - French (France)"
         ]
         Speaker_1 = gr.Dropdown(
             choices=speaker_choices,
-            value="YunJhe - 中文 (Taiwan)",
             label="主持人的語音",
             interactive=True,
             scale=2
         )
         Speaker_2 = gr.Dropdown(
             choices=speaker_choices,
-            value="HsiaoChen - 中文 (Taiwan)",
             label="來賓的語音",
             interactive=True,
             scale=2
         )
-    api_key = gr.Textbox(label="API Key", type="password", placeholder="API authentication key for large language models")
     podcast_script = gr.Textbox(label="生成的結果")
     audio_output = gr.Audio(label="生成的音頻")
-    generate_button = gr.Button("生成")
     generate_button.click(fn=process_podcast, inputs=[input_text, Language, Speaker_1, Speaker_2, api_key], outputs=[podcast_script, audio_output])
 if __name__ == "__main__":

 import re
 from pydub import AudioSegment
 import uuid
 import edge_tts
 import json
 def create_client(api_key=None):
         podcast_match = re.search(r'{.*}', response.choices[0].message.content, re.DOTALL)
         if podcast_match:
             podcast_json = podcast_match.group(0)
             try:
                 json.loads(podcast_json)
             except json.JSONDecodeError:
+                podcast_json = re.sub(r',\s*}', '}', podcast_json)
+                podcast_json = re.sub(r',\s*]', ']', podcast_json)
             return podcast_json
         else:
+            raise gr.Error("生成 Podcast 劇本失敗！！請稍後再試。")
     except Exception as e:
         if "API key not valid" in str(e):
+            raise gr.Error("無效的 API 金鑰！！請提供有效的 API 金鑰。")
         elif "rate limit" in str(e).lower():
+            raise gr.Error("API 金鑰使用額度已超過限制！！請稍後再試或使用其他 API 金鑰。")
         else:
+            raise gr.Error(f"生成 Podcast 劇本失敗！！請稍後再試。")
 async def tts_generate(input_text, speaker1, speaker2):
     voice_names = {
+        "YunJhe - 臺灣國語 (Male)": "zh-TW-YunJheNeural",
+        "HsiaoChen - 臺灣國語 (Female)": "zh-TW-HsiaoChenNeural",
+        "HsiaoYu - 臺灣國語 (Female)": "zh-TW-HsiaoYuNeural",
+        "HiuGaai - 中文 (Female)": "zh-HK-HiuGaaiNeural",
+        "HiuMaan - 中文 (Female)": "zh-HK-HiuMaanNeural",
+        "WanLung - 中文 (Female)": "zh-HK-WanLungNeural",
+        "Xiaoxiao - 中文 (Female)": "zh-CN-XiaoxiaoNeural",
+        "Xiaoyi - 中文 (Female)": "zh-CN-XiaoyiNeural",
+        "Yunjian - 中文 (Male)": "zh-CN-YunjianNeural",
+        "Yunxi - 中文 (Male)": "zh-CN-YunxiNeural",
+        "Yunxia - 中文 (Male)": "zh-CN-YunxiaNeural",
+        "Yunyang - 中文 (Male)": "zh-CN-YunyangNeural",
+        "Xiaobei - 中文 (Female)": "zh-CN-liaoning-XiaobeiNeural",
+        "Xiaoni - 中文 (Female)": "zh-CN-shaanxi-XiaoniNeural",
+        "Andrew - English (Male)": "en-US-AndrewMultilingualNeural",
+        "Ava - English (Female)": "en-US-AvaMultilingualNeural",
+        "Brian - English (Male)": "en-US-BrianMultilingualNeural",
+        "Emma - English (Female)": "en-US-EmmaMultilingualNeural",
+        "Florian - German (Male)": "de-DE-FlorianMultilingualNeural",
+        "Seraphina - German (Female)": "de-DE-SeraphinaMultilingualNeural",
+        "Remy - French (Male)": "fr-FR-RemyMultilingualNeural",
+        "Vivienne - French (Female)": "fr-FR-VivienneMultilingualNeural"
     }
     speaker1_voice = voice_names[speaker1]
         audio = AudioSegment.from_mp3(audio_file)
         combined += audio
+        os.remove(audio_file)
         podcast_json["podcast"].append({
             "speaker": speaker_map.get(speaker, speaker),
             "line": text
         })
+    output_file = f"Jiangxz_{uuid.uuid4()}.mp3"
     combined.export(output_file, format="mp3")
     return output_file
 with gr.Blocks() as iface:
     gr.Markdown("# 🎙️ Generated Podcast Audio. Deployed by 江信宗")
+    input_text = gr.Textbox(label="請輸入 Podcast 話題（建議50~500字之間）", placeholder="輸入 Podcast 話題簡易內容......")
     with gr.Row():
         Language = gr.Dropdown(
         )
         speaker_choices = [
+            "YunJhe - 臺灣國語 (Male)",
+            "HsiaoChen - 臺灣國語 (Female)",
+            "HsiaoYu - 臺灣國語 (Female)",
+            "HiuGaai - 中文 (Female)",
+            "HiuMaan - 中文 (Female)",
+            "WanLung - 中文 (Female)",
+            "Xiaoxiao - 中文 (Female)",
+            "Xiaoyi - 中文 (Female)",
+            "Yunjian - 中文 (Male)",
+            "Yunxi - 中文 (Male)",
+            "Yunxia - 中文 (Male)",
+            "Yunyang - 中文 (Male)",
+            "Xiaobei - 中文 (Female)",
+            "Xiaoni - 中文 (Female)",
+            "Andrew - English (Male)",
+            "Ava - English (Female)",
+            "Brian - English (Male)",
+            "Emma - English (Female)",
+            "Florian - German (Male)",
+            "Seraphina - German (Female)",
+            "Remy - French (Male)",
+            "Vivienne - French (Female)"
         ]
         Speaker_1 = gr.Dropdown(
             choices=speaker_choices,
+            value="YunJhe - 臺灣國語 (Male)",
             label="主持人的語音",
             interactive=True,
             scale=2
         )
         Speaker_2 = gr.Dropdown(
             choices=speaker_choices,
+            value="HsiaoChen - 臺灣國語 (Female)",
             label="來賓的語音",
             interactive=True,
             scale=2
         )
+    with gr.Row():
+        generate_button = gr.Button("生成", scale=2)
+        api_key = gr.Textbox(label="API Key", type="password", placeholder="API authentication key for large language models", scale=1)
     podcast_script = gr.Textbox(label="生成的結果")
     audio_output = gr.Audio(label="生成的音頻")
     generate_button.click(fn=process_podcast, inputs=[input_text, Language, Speaker_1, Speaker_2, api_key], outputs=[podcast_script, audio_output])
 if __name__ == "__main__":