Spaces:

deeme
/

pod

Running

App Files Files Community

deeme commited on Dec 4, 2024

Commit

2cc3649

verified ·

1 Parent(s): 625933e

Upload app.py

Browse files

Files changed (1) hide show

app.py +158 -43

app.py CHANGED Viewed

@@ -5,6 +5,9 @@ import logging
 from podcastfy.client import generate_podcast
 from dotenv import load_dotenv
 # Configure logging
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
@@ -48,7 +51,18 @@ VOICE_OPTIONS = [
     {"id": "shimmer", "name": "shimmer"},
 ]
 def get_api_key(key_name, ui_value):
     return ui_value if ui_value else os.getenv(key_name)
 def process_inputs(
@@ -60,7 +74,8 @@ def process_inputs(
     openai_key,
     openai_base_url,  # 新增参数
     elevenlabs_key,
-    word_count,
     conversation_style,
     roles_person1,
     roles_person2,
@@ -75,6 +90,11 @@ def process_inputs(
     tts_openai_question,
     tts_openai_answer,
     ending_message,
 ):
     try:
         logger.info("Starting podcast generation process")
@@ -82,14 +102,20 @@ def process_inputs(
         # API key handling
         logger.debug("Setting API keys")
         os.environ["GEMINI_API_KEY"] = get_api_key("GEMINI_API_KEY", gemini_key)
         if tts_model == "openai":
-            logger.debug("Setting OpenAI API key")
-            if not openai_key and not os.getenv("OPENAI_API_KEY"):
-                raise ValueError("OpenAI API key is required when using OpenAI TTS model")
-            os.environ["OPENAI_API_KEY"] = get_api_key("OPENAI_API_KEY", openai_key)
-            if openai_base_url:
-                os.environ["OPENAI_API_BASE"] = openai_base_url
         if tts_model == "elevenlabs":
             logger.debug("Setting ElevenLabs API key")
@@ -151,7 +177,8 @@ def process_inputs(
         # Prepare conversation config
         logger.debug("Preparing conversation config")
         conversation_config = {
-            "word_count": word_count,
             "conversation_style": conversation_style.split(','),
             "roles_person1": roles_person1,
             "roles_person2": roles_person2,
@@ -186,6 +213,10 @@ def process_inputs(
             image_paths=image_paths if image_paths else None,
             tts_model=tts_model,
             conversation_config=conversation_config,
         )
         logger.info("Podcast generation completed")
@@ -254,7 +285,7 @@ with gr.Blocks(
                 label="Gemini API Key",
                 type="password",
                 value="",
-                info="必须的"
             )
             openai_key = gr.Textbox(
                 label="OpenAI API Key",
@@ -332,13 +363,45 @@ with gr.Blocks(
                 </h3>
                 """,
             )
-            word_count = gr.Slider(
-                minimum=500,
-                maximum=5000,
-                value=2000,
-                step=100,
-                label="字数统计",
-                info="目标字数（用于生成内容）学术辩论:3000。讲故事:1000"
             )
             conversation_style = gr.Textbox(
@@ -414,12 +477,6 @@ with gr.Blocks(
                 info="播客使用的语言"
             )
-#            longform = gr.Checkbox(
-#                label="长篇模式",
-#                value=False,
-#                info="启用长篇内容生成模式"
-#            )
             # Voice Settings
             gr.Markdown(
                 """
@@ -434,22 +491,35 @@ with gr.Blocks(
                 info="结束语"
             )
             tts_model = gr.Radio(
-                choices=["openai", "elevenlabs", "edge"],
                 value="openai",
                 label="文本转语音模型",
                 info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
             )
-            tts_openai_question = gr.Dropdown(
-                choices={voice["name"]: voice["id"] for voice in VOICE_OPTIONS},
-                value=VOICE_OPTIONS[27]["id"],  # 默认选择选项
-                label="OpenAI TTS 主持人",
-                info="选择OpenAI TTS 主持人角色语音"
-            )
-            tts_openai_answer = gr.Dropdown(
-                choices={voice["name"]: voice["id"] for voice in VOICE_OPTIONS},
-                value=VOICE_OPTIONS[31]["id"],  # 默认选择选项
-                label="OpenAI TTS 嘉宾",
-                info="选择OpenAI TTS 嘉宾角色语音"
             )
             # Advanced Settings
@@ -469,16 +539,60 @@ with gr.Blocks(
                 info="一些额外的指令，用来帮助AI更好地理解你想要聊天的内容和方向"
             )
-#            api_key_label = gr.Textbox(
-#                label="自定义基于云的 LLM",
 #                value="GEMINI_API_KEY",
-#                info="可选，默认使用 Gemini，如使用 OPENAI，上面填入 'OPENAI_API_KEY' 并保证设置好环境变量且设置好下面的模型"
 #            )
-#            llm_model_name = gr.Textbox(
-#                label="设置好对应自定义基于云的 LLM 模型",
-#                value="gemini-1.5-pro-latest",
-#                info="可选，配合上面的参数，默认是 Gemini 的 gemini-1.5-pro-latest，默认 OPENAI 可支持模型 api.168369.xyz/v1/models 获取"
 #            )
     # Output Section
@@ -504,12 +618,13 @@ with gr.Blocks(
             text_input, urls_input, pdf_files, image_files,
             gemini_key, openai_key, openai_base_url,
             elevenlabs_key,
-            word_count, conversation_style,
             roles_person1, roles_person2,
             dialogue_structure, podcast_name,
             podcast_tagline, output_language, tts_model,
             creativity_level, user_instructions,
             engagement_techniques, tts_openai_question, tts_openai_answer, ending_message,
         ],
         outputs=audio_output
     )

 from podcastfy.client import generate_podcast
 from dotenv import load_dotenv
+import requests
+import json
 # Configure logging
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
     {"id": "shimmer", "name": "shimmer"},
 ]
+# 添加 API Keys 轮询功能
+def get_next_gemini_key(api_keys):
+    keys = [k.strip() for k in api_keys.split(',') if k.strip()]
+    if not hasattr(get_next_gemini_key, 'current_index'):
+        get_next_gemini_key.current_index = 0
+    key = keys[get_next_gemini_key.current_index]
+    get_next_gemini_key.current_index = (get_next_gemini_key.current_index + 1) % len(keys)
+    return key
 def get_api_key(key_name, ui_value):
+    if key_name == "GEMINI_API_KEY" and ui_value and ',' in ui_value:
+        return get_next_gemini_key(ui_value)
     return ui_value if ui_value else os.getenv(key_name)
 def process_inputs(
     openai_key,
     openai_base_url,  # 新增参数
     elevenlabs_key,
+    max_num_chunks,
+    min_chunk_size,
     conversation_style,
     roles_person1,
     roles_person2,
     tts_openai_question,
     tts_openai_answer,
     ending_message,
+    longform,
+    llm_model_name,
+    #api_key_label,
+    #gemini_model,
+    #openai_model,
 ):
     try:
         logger.info("Starting podcast generation process")
         # API key handling
         logger.debug("Setting API keys")
         os.environ["GEMINI_API_KEY"] = get_api_key("GEMINI_API_KEY", gemini_key)
+        logger.debug("Setting OpenAI API key")
+        if not openai_key and not os.getenv("OPENAI_API_KEY"):
+            raise ValueError("OpenAI API key is required when using OpenAI TTS model")
+        os.environ["OPENAI_API_KEY"] = get_api_key("OPENAI_API_KEY", openai_key)
+#        if api_key_label == "OPENAI_API_KEY":
+        os.environ["OPENAI_API_BASE"] = get_api_key("OPENAI_BASE_URL", openai_base_url)
         if tts_model == "openai":
+            os.environ["OPENAI_BASE_URL"] = get_api_key("OPENAI_BASE_URL", openai_base_url)
+            # 根据选择的名称找到对应的 voice ID
+            tts_openai_question = next(voice["id"] for voice in VOICE_OPTIONS if voice["name"] == tts_openai_question)
+            tts_openai_answer = next(voice["id"] for voice in VOICE_OPTIONS if voice["name"] == tts_openai_answer)
         if tts_model == "elevenlabs":
             logger.debug("Setting ElevenLabs API key")
         # Prepare conversation config
         logger.debug("Preparing conversation config")
         conversation_config = {
+            "max_num_chunks": max_num_chunks,
+            "min_chunk_size": min_chunk_size,
             "conversation_style": conversation_style.split(','),
             "roles_person1": roles_person1,
             "roles_person2": roles_person2,
             image_paths=image_paths if image_paths else None,
             tts_model=tts_model,
             conversation_config=conversation_config,
+            longform=longform,
+            llm_model_name=llm_model_name,
+            api_key_label="OPENAI_API_KEY",
+            #llm_model_name=get_active_model(api_key_label, gemini_model, openai_model),
         )
         logger.info("Podcast generation completed")
                 label="Gemini API Key",
                 type="password",
                 value="",
+                info="必须的，多个key请用逗号分隔"
             )
             openai_key = gr.Textbox(
                 label="OpenAI API Key",
                 </h3>
                 """,
             )
+            llm_model_name = gr.Radio(
+                choices=["gemini-1.5-pro-latest", "gemini-exp-1121", "learnlm-1.5-pro-experimental", "o1-mini", "o1-preview", "gpt-4o-mini", "gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-4-turbo-2024-04-09", "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022"],
+                value="gemini-1.5-pro-latest",
+                label="文本生成模型",
+                info="默认使用 gemini-1.5-pro-latest "
+            )
+            longform = gr.Checkbox(
+                label="长篇模式",
+                value=False,
+                info="启用长篇内容生成模式，启用长篇需要Google Cloud支持，设置好GOOGLE_API_KEY"
+            )
+            with gr.Group(visible=False) as longform_settings_group:
+                max_num_chunks = gr.Slider(
+                    minimum=1,
+                    maximum=20,
+                    value=8,
+                    step=1,
+                    label="最大轮数",
+                    info="长篇模式下，生成的最大轮数"
+                )
+                min_chunk_size = gr.Slider(
+                    minimum=300,
+                    maximum=2000,
+                    value=600,
+                    step=100,
+                    label="一轮最小字符数",
+                    info="长篇模式下，生成一轮所需的最小字符数"
+                )
+            # 添加更新可见性的函数
+            def update_longform_settings(is_longform):
+                return gr.update(visible=is_longform)
+            # 添加事件监听
+            longform.change(
+                fn=update_longform_settings,
+                inputs=[longform],
+                outputs=[longform_settings_group]
             )
             conversation_style = gr.Textbox(
                 info="播客使用的语言"
             )
             # Voice Settings
             gr.Markdown(
                 """
                 info="结束语"
             )
             tts_model = gr.Radio(
+                choices=["openai", "geminimulti", "elevenlabs", "gemini", "edge"],
                 value="openai",
                 label="文本转语音模型",
                 info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
             )
+            with gr.Group(visible=True) as openai_voice_group:
+                tts_openai_question = gr.Dropdown(
+                    choices=[voice["name"] for voice in VOICE_OPTIONS],
+                    value=VOICE_OPTIONS[27]["name"],
+                    label="OpenAI TTS 主持人",
+                    info="选择OpenAI TTS 主持人角色语音"
+                )
+                tts_openai_answer = gr.Dropdown(
+                    choices=[voice["name"] for voice in VOICE_OPTIONS],
+                    value=VOICE_OPTIONS[31]["name"],
+                    label="OpenAI TTS 嘉宾",
+                    info="选择OpenAI TTS 嘉宾角色语音"
+                )
+            # 添加更新可见性的函数
+            def update_voice_options(tts_model):
+                return gr.update(visible=(tts_model == "openai"))
+            # 添加事件监听
+            tts_model.change(
+                fn=update_voice_options,
+                inputs=[tts_model],
+                outputs=[openai_voice_group]
             )
             # Advanced Settings
                 info="一些额外的指令，用来帮助AI更好地理解你想要聊天的内容和方向"
             )
+#            api_key_label = gr.Radio(
+#                choices=["GEMINI_API_KEY", "OPENAI_API_KEY"],
 #                value="GEMINI_API_KEY",
+#                label="文本生成模型供应商",
+#                info="默认使用 Gemini "
 #            )
+#            with gr.Group(visible=True) as gemini_llm_group:
+#                gemini_model = gr.Radio(
+#                    choices=["gemini-1.5-pro-latest", "gemini-exp-1121", "learnlm-1.5-pro-experimental"],
+#                    value="gemini-1.5-pro-latest",
+#                    label="Gemini 文本生成模型",
+#                    info="默认使用 gemini-1.5-pro-latest "
+#                )
+#            def fetch_openai_models():
+#                try:
+#                    response = requests.get("https://api.168369.xyz/v1/models")
+#                    data = response.json()
+                    # 提取所有模型的 id
+#                    model_ids = [model["id"] for model in data["data"]]
+#                    return model_ids
+#                except Exception as e:
+#                    print(f"获取模型列表失败: {str(e)}")
+#                    return ["获取模型列表失败"]
+#            with gr.Group(visible=False) as openai_llm_group:
+#                openai_model = gr.Radio(
+                    #choices=fetch_openai_models(),  # 从 API 获取模型列表
+#                    choices=["o1-mini", "o1-preview", "gpt-4o-mini", "gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-4-turbo-2024-04-09"],
+#                    value="gpt-4o-mini",
+#                    label="Openai 文本生成模型",
+#                    info="默认为 gpt-4o-mini"
+#                )
+            # 添加获取当前有效模型的函数
+#            def get_active_model(api_key_label, gemini_model, openai_model):
+#                if api_key_label == "GEMINI_API_KEY":
+#                    return gemini_model
+#                else:  # OPENAI_API_KEY
+#                    return openai_model
+            # 添加更新可见性的函数
+#            def update_llm_options(api_key_label):
+#                if api_key_label == "GEMINI_API_KEY":
+#                    return gr.update(visible=True), gr.update(visible=False)
+#                else:  # OPENAI_API_KEY
+#                    return gr.update(visible=False), gr.update(visible=True)
+            # 添加事件监听
+#            api_key_label.change(
+#                fn=update_llm_options,
+#                inputs=[api_key_label],
+#                outputs=[gemini_llm_group, openai_llm_group]
 #            )
     # Output Section
             text_input, urls_input, pdf_files, image_files,
             gemini_key, openai_key, openai_base_url,
             elevenlabs_key,
+            max_num_chunks, min_chunk_size, conversation_style,
             roles_person1, roles_person2,
             dialogue_structure, podcast_name,
             podcast_tagline, output_language, tts_model,
             creativity_level, user_instructions,
             engagement_techniques, tts_openai_question, tts_openai_answer, ending_message,
+            longform, llm_model_name, #api_key_label, gemini_model, openai_model,
         ],
         outputs=audio_output
     )