Upload app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,9 @@ import logging
|
|
5 |
from podcastfy.client import generate_podcast
|
6 |
from dotenv import load_dotenv
|
7 |
|
|
|
|
|
|
|
8 |
# Configure logging
|
9 |
logging.basicConfig(level=logging.DEBUG)
|
10 |
logger = logging.getLogger(__name__)
|
@@ -48,7 +51,18 @@ VOICE_OPTIONS = [
|
|
48 |
{"id": "shimmer", "name": "shimmer"},
|
49 |
]
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
def get_api_key(key_name, ui_value):
|
|
|
|
|
52 |
return ui_value if ui_value else os.getenv(key_name)
|
53 |
|
54 |
def process_inputs(
|
@@ -60,7 +74,8 @@ def process_inputs(
|
|
60 |
openai_key,
|
61 |
openai_base_url, # 新增参数
|
62 |
elevenlabs_key,
|
63 |
-
|
|
|
64 |
conversation_style,
|
65 |
roles_person1,
|
66 |
roles_person2,
|
@@ -75,6 +90,11 @@ def process_inputs(
|
|
75 |
tts_openai_question,
|
76 |
tts_openai_answer,
|
77 |
ending_message,
|
|
|
|
|
|
|
|
|
|
|
78 |
):
|
79 |
try:
|
80 |
logger.info("Starting podcast generation process")
|
@@ -82,14 +102,20 @@ def process_inputs(
|
|
82 |
# API key handling
|
83 |
logger.debug("Setting API keys")
|
84 |
os.environ["GEMINI_API_KEY"] = get_api_key("GEMINI_API_KEY", gemini_key)
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
if tts_model == "openai":
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
if openai_base_url:
|
92 |
-
os.environ["OPENAI_API_BASE"] = openai_base_url
|
93 |
|
94 |
if tts_model == "elevenlabs":
|
95 |
logger.debug("Setting ElevenLabs API key")
|
@@ -151,7 +177,8 @@ def process_inputs(
|
|
151 |
# Prepare conversation config
|
152 |
logger.debug("Preparing conversation config")
|
153 |
conversation_config = {
|
154 |
-
"
|
|
|
155 |
"conversation_style": conversation_style.split(','),
|
156 |
"roles_person1": roles_person1,
|
157 |
"roles_person2": roles_person2,
|
@@ -186,6 +213,10 @@ def process_inputs(
|
|
186 |
image_paths=image_paths if image_paths else None,
|
187 |
tts_model=tts_model,
|
188 |
conversation_config=conversation_config,
|
|
|
|
|
|
|
|
|
189 |
)
|
190 |
|
191 |
logger.info("Podcast generation completed")
|
@@ -254,7 +285,7 @@ with gr.Blocks(
|
|
254 |
label="Gemini API Key",
|
255 |
type="password",
|
256 |
value="",
|
257 |
-
info="
|
258 |
)
|
259 |
openai_key = gr.Textbox(
|
260 |
label="OpenAI API Key",
|
@@ -332,13 +363,45 @@ with gr.Blocks(
|
|
332 |
</h3>
|
333 |
""",
|
334 |
)
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
)
|
343 |
|
344 |
conversation_style = gr.Textbox(
|
@@ -414,12 +477,6 @@ with gr.Blocks(
|
|
414 |
info="播客使用的语言"
|
415 |
)
|
416 |
|
417 |
-
# longform = gr.Checkbox(
|
418 |
-
# label="长篇模式",
|
419 |
-
# value=False,
|
420 |
-
# info="启用长篇内容生成模式"
|
421 |
-
# )
|
422 |
-
|
423 |
# Voice Settings
|
424 |
gr.Markdown(
|
425 |
"""
|
@@ -434,22 +491,35 @@ with gr.Blocks(
|
|
434 |
info="结束语"
|
435 |
)
|
436 |
tts_model = gr.Radio(
|
437 |
-
choices=["openai", "elevenlabs", "edge"],
|
438 |
value="openai",
|
439 |
label="文本转语音模型",
|
440 |
info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
|
441 |
)
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
453 |
)
|
454 |
|
455 |
# Advanced Settings
|
@@ -469,16 +539,60 @@ with gr.Blocks(
|
|
469 |
info="一些额外的指令,用来帮助AI更好地理解你想要聊天的内容和方向"
|
470 |
)
|
471 |
|
472 |
-
# api_key_label = gr.
|
473 |
-
#
|
474 |
# value="GEMINI_API_KEY",
|
475 |
-
#
|
|
|
476 |
# )
|
477 |
|
478 |
-
#
|
479 |
-
#
|
480 |
-
#
|
481 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
482 |
# )
|
483 |
|
484 |
# Output Section
|
@@ -504,12 +618,13 @@ with gr.Blocks(
|
|
504 |
text_input, urls_input, pdf_files, image_files,
|
505 |
gemini_key, openai_key, openai_base_url,
|
506 |
elevenlabs_key,
|
507 |
-
|
508 |
roles_person1, roles_person2,
|
509 |
dialogue_structure, podcast_name,
|
510 |
podcast_tagline, output_language, tts_model,
|
511 |
creativity_level, user_instructions,
|
512 |
engagement_techniques, tts_openai_question, tts_openai_answer, ending_message,
|
|
|
513 |
],
|
514 |
outputs=audio_output
|
515 |
)
|
|
|
5 |
from podcastfy.client import generate_podcast
|
6 |
from dotenv import load_dotenv
|
7 |
|
8 |
+
import requests
|
9 |
+
import json
|
10 |
+
|
11 |
# Configure logging
|
12 |
logging.basicConfig(level=logging.DEBUG)
|
13 |
logger = logging.getLogger(__name__)
|
|
|
51 |
{"id": "shimmer", "name": "shimmer"},
|
52 |
]
|
53 |
|
54 |
+
# 添加 API Keys 轮询功能
|
55 |
+
def get_next_gemini_key(api_keys):
|
56 |
+
keys = [k.strip() for k in api_keys.split(',') if k.strip()]
|
57 |
+
if not hasattr(get_next_gemini_key, 'current_index'):
|
58 |
+
get_next_gemini_key.current_index = 0
|
59 |
+
key = keys[get_next_gemini_key.current_index]
|
60 |
+
get_next_gemini_key.current_index = (get_next_gemini_key.current_index + 1) % len(keys)
|
61 |
+
return key
|
62 |
+
|
63 |
def get_api_key(key_name, ui_value):
|
64 |
+
if key_name == "GEMINI_API_KEY" and ui_value and ',' in ui_value:
|
65 |
+
return get_next_gemini_key(ui_value)
|
66 |
return ui_value if ui_value else os.getenv(key_name)
|
67 |
|
68 |
def process_inputs(
|
|
|
74 |
openai_key,
|
75 |
openai_base_url, # 新增参数
|
76 |
elevenlabs_key,
|
77 |
+
max_num_chunks,
|
78 |
+
min_chunk_size,
|
79 |
conversation_style,
|
80 |
roles_person1,
|
81 |
roles_person2,
|
|
|
90 |
tts_openai_question,
|
91 |
tts_openai_answer,
|
92 |
ending_message,
|
93 |
+
longform,
|
94 |
+
llm_model_name,
|
95 |
+
#api_key_label,
|
96 |
+
#gemini_model,
|
97 |
+
#openai_model,
|
98 |
):
|
99 |
try:
|
100 |
logger.info("Starting podcast generation process")
|
|
|
102 |
# API key handling
|
103 |
logger.debug("Setting API keys")
|
104 |
os.environ["GEMINI_API_KEY"] = get_api_key("GEMINI_API_KEY", gemini_key)
|
105 |
+
|
106 |
+
logger.debug("Setting OpenAI API key")
|
107 |
+
if not openai_key and not os.getenv("OPENAI_API_KEY"):
|
108 |
+
raise ValueError("OpenAI API key is required when using OpenAI TTS model")
|
109 |
+
os.environ["OPENAI_API_KEY"] = get_api_key("OPENAI_API_KEY", openai_key)
|
110 |
+
|
111 |
+
# if api_key_label == "OPENAI_API_KEY":
|
112 |
+
os.environ["OPENAI_API_BASE"] = get_api_key("OPENAI_BASE_URL", openai_base_url)
|
113 |
+
|
114 |
if tts_model == "openai":
|
115 |
+
os.environ["OPENAI_BASE_URL"] = get_api_key("OPENAI_BASE_URL", openai_base_url)
|
116 |
+
# 根据选择的名称找到对应的 voice ID
|
117 |
+
tts_openai_question = next(voice["id"] for voice in VOICE_OPTIONS if voice["name"] == tts_openai_question)
|
118 |
+
tts_openai_answer = next(voice["id"] for voice in VOICE_OPTIONS if voice["name"] == tts_openai_answer)
|
|
|
|
|
119 |
|
120 |
if tts_model == "elevenlabs":
|
121 |
logger.debug("Setting ElevenLabs API key")
|
|
|
177 |
# Prepare conversation config
|
178 |
logger.debug("Preparing conversation config")
|
179 |
conversation_config = {
|
180 |
+
"max_num_chunks": max_num_chunks,
|
181 |
+
"min_chunk_size": min_chunk_size,
|
182 |
"conversation_style": conversation_style.split(','),
|
183 |
"roles_person1": roles_person1,
|
184 |
"roles_person2": roles_person2,
|
|
|
213 |
image_paths=image_paths if image_paths else None,
|
214 |
tts_model=tts_model,
|
215 |
conversation_config=conversation_config,
|
216 |
+
longform=longform,
|
217 |
+
llm_model_name=llm_model_name,
|
218 |
+
api_key_label="OPENAI_API_KEY",
|
219 |
+
#llm_model_name=get_active_model(api_key_label, gemini_model, openai_model),
|
220 |
)
|
221 |
|
222 |
logger.info("Podcast generation completed")
|
|
|
285 |
label="Gemini API Key",
|
286 |
type="password",
|
287 |
value="",
|
288 |
+
info="必须的,多个key请用逗号分隔"
|
289 |
)
|
290 |
openai_key = gr.Textbox(
|
291 |
label="OpenAI API Key",
|
|
|
363 |
</h3>
|
364 |
""",
|
365 |
)
|
366 |
+
llm_model_name = gr.Radio(
|
367 |
+
choices=["gemini-1.5-pro-latest", "gemini-exp-1121", "learnlm-1.5-pro-experimental", "o1-mini", "o1-preview", "gpt-4o-mini", "gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-4-turbo-2024-04-09", "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022"],
|
368 |
+
value="gemini-1.5-pro-latest",
|
369 |
+
label="文本生成模型",
|
370 |
+
info="默认使用 gemini-1.5-pro-latest "
|
371 |
+
)
|
372 |
+
|
373 |
+
longform = gr.Checkbox(
|
374 |
+
label="长篇模式",
|
375 |
+
value=False,
|
376 |
+
info="启用长篇内容生成模式,启用长篇需要Google Cloud支持,设置好GOOGLE_API_KEY"
|
377 |
+
)
|
378 |
+
with gr.Group(visible=False) as longform_settings_group:
|
379 |
+
max_num_chunks = gr.Slider(
|
380 |
+
minimum=1,
|
381 |
+
maximum=20,
|
382 |
+
value=8,
|
383 |
+
step=1,
|
384 |
+
label="最大轮数",
|
385 |
+
info="长篇模式下,生成的最大轮数"
|
386 |
+
)
|
387 |
+
min_chunk_size = gr.Slider(
|
388 |
+
minimum=300,
|
389 |
+
maximum=2000,
|
390 |
+
value=600,
|
391 |
+
step=100,
|
392 |
+
label="一轮最小字符数",
|
393 |
+
info="长篇模式下,生成一轮所需的最小字符数"
|
394 |
+
)
|
395 |
+
|
396 |
+
# 添加更新可见性的函数
|
397 |
+
def update_longform_settings(is_longform):
|
398 |
+
return gr.update(visible=is_longform)
|
399 |
+
|
400 |
+
# 添加事件监听
|
401 |
+
longform.change(
|
402 |
+
fn=update_longform_settings,
|
403 |
+
inputs=[longform],
|
404 |
+
outputs=[longform_settings_group]
|
405 |
)
|
406 |
|
407 |
conversation_style = gr.Textbox(
|
|
|
477 |
info="播客使用的语言"
|
478 |
)
|
479 |
|
|
|
|
|
|
|
|
|
|
|
|
|
480 |
# Voice Settings
|
481 |
gr.Markdown(
|
482 |
"""
|
|
|
491 |
info="结束语"
|
492 |
)
|
493 |
tts_model = gr.Radio(
|
494 |
+
choices=["openai", "geminimulti", "elevenlabs", "gemini", "edge"],
|
495 |
value="openai",
|
496 |
label="文本转语音模型",
|
497 |
info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
|
498 |
)
|
499 |
+
with gr.Group(visible=True) as openai_voice_group:
|
500 |
+
tts_openai_question = gr.Dropdown(
|
501 |
+
choices=[voice["name"] for voice in VOICE_OPTIONS],
|
502 |
+
value=VOICE_OPTIONS[27]["name"],
|
503 |
+
label="OpenAI TTS 主持人",
|
504 |
+
info="选择OpenAI TTS 主持人角色语音"
|
505 |
+
)
|
506 |
+
|
507 |
+
tts_openai_answer = gr.Dropdown(
|
508 |
+
choices=[voice["name"] for voice in VOICE_OPTIONS],
|
509 |
+
value=VOICE_OPTIONS[31]["name"],
|
510 |
+
label="OpenAI TTS 嘉宾",
|
511 |
+
info="选择OpenAI TTS 嘉宾角色语音"
|
512 |
+
)
|
513 |
+
|
514 |
+
# 添加更新可见性的函数
|
515 |
+
def update_voice_options(tts_model):
|
516 |
+
return gr.update(visible=(tts_model == "openai"))
|
517 |
+
|
518 |
+
# 添加事件监听
|
519 |
+
tts_model.change(
|
520 |
+
fn=update_voice_options,
|
521 |
+
inputs=[tts_model],
|
522 |
+
outputs=[openai_voice_group]
|
523 |
)
|
524 |
|
525 |
# Advanced Settings
|
|
|
539 |
info="一些额外的指令,用来帮助AI更好地理解你想要聊天的内容和方向"
|
540 |
)
|
541 |
|
542 |
+
# api_key_label = gr.Radio(
|
543 |
+
# choices=["GEMINI_API_KEY", "OPENAI_API_KEY"],
|
544 |
# value="GEMINI_API_KEY",
|
545 |
+
# label="文本生成模型供应商",
|
546 |
+
# info="默认使用 Gemini "
|
547 |
# )
|
548 |
|
549 |
+
# with gr.Group(visible=True) as gemini_llm_group:
|
550 |
+
# gemini_model = gr.Radio(
|
551 |
+
# choices=["gemini-1.5-pro-latest", "gemini-exp-1121", "learnlm-1.5-pro-experimental"],
|
552 |
+
# value="gemini-1.5-pro-latest",
|
553 |
+
# label="Gemini 文本生成模型",
|
554 |
+
# info="默认使用 gemini-1.5-pro-latest "
|
555 |
+
# )
|
556 |
+
|
557 |
+
# def fetch_openai_models():
|
558 |
+
# try:
|
559 |
+
# response = requests.get("https://api.168369.xyz/v1/models")
|
560 |
+
# data = response.json()
|
561 |
+
# 提取所有模型的 id
|
562 |
+
# model_ids = [model["id"] for model in data["data"]]
|
563 |
+
# return model_ids
|
564 |
+
# except Exception as e:
|
565 |
+
# print(f"获取模型列表失败: {str(e)}")
|
566 |
+
# return ["获取模型列表失败"]
|
567 |
+
|
568 |
+
# with gr.Group(visible=False) as openai_llm_group:
|
569 |
+
# openai_model = gr.Radio(
|
570 |
+
#choices=fetch_openai_models(), # 从 API 获取模型列表
|
571 |
+
# choices=["o1-mini", "o1-preview", "gpt-4o-mini", "gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-4-turbo-2024-04-09"],
|
572 |
+
# value="gpt-4o-mini",
|
573 |
+
# label="Openai 文本生成模型",
|
574 |
+
# info="默认为 gpt-4o-mini"
|
575 |
+
# )
|
576 |
+
|
577 |
+
# 添加获取当前有效模型的函数
|
578 |
+
# def get_active_model(api_key_label, gemini_model, openai_model):
|
579 |
+
# if api_key_label == "GEMINI_API_KEY":
|
580 |
+
# return gemini_model
|
581 |
+
# else: # OPENAI_API_KEY
|
582 |
+
# return openai_model
|
583 |
+
|
584 |
+
# 添加更新可见性的函数
|
585 |
+
# def update_llm_options(api_key_label):
|
586 |
+
# if api_key_label == "GEMINI_API_KEY":
|
587 |
+
# return gr.update(visible=True), gr.update(visible=False)
|
588 |
+
# else: # OPENAI_API_KEY
|
589 |
+
# return gr.update(visible=False), gr.update(visible=True)
|
590 |
+
|
591 |
+
# 添加事件监听
|
592 |
+
# api_key_label.change(
|
593 |
+
# fn=update_llm_options,
|
594 |
+
# inputs=[api_key_label],
|
595 |
+
# outputs=[gemini_llm_group, openai_llm_group]
|
596 |
# )
|
597 |
|
598 |
# Output Section
|
|
|
618 |
text_input, urls_input, pdf_files, image_files,
|
619 |
gemini_key, openai_key, openai_base_url,
|
620 |
elevenlabs_key,
|
621 |
+
max_num_chunks, min_chunk_size, conversation_style,
|
622 |
roles_person1, roles_person2,
|
623 |
dialogue_structure, podcast_name,
|
624 |
podcast_tagline, output_language, tts_model,
|
625 |
creativity_level, user_instructions,
|
626 |
engagement_techniques, tts_openai_question, tts_openai_answer, ending_message,
|
627 |
+
longform, llm_model_name, #api_key_label, gemini_model, openai_model,
|
628 |
],
|
629 |
outputs=audio_output
|
630 |
)
|