deeme commited on
Commit
2cc3649
·
verified ·
1 Parent(s): 625933e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -43
app.py CHANGED
@@ -5,6 +5,9 @@ import logging
5
  from podcastfy.client import generate_podcast
6
  from dotenv import load_dotenv
7
 
 
 
 
8
  # Configure logging
9
  logging.basicConfig(level=logging.DEBUG)
10
  logger = logging.getLogger(__name__)
@@ -48,7 +51,18 @@ VOICE_OPTIONS = [
48
  {"id": "shimmer", "name": "shimmer"},
49
  ]
50
 
 
 
 
 
 
 
 
 
 
51
  def get_api_key(key_name, ui_value):
 
 
52
  return ui_value if ui_value else os.getenv(key_name)
53
 
54
  def process_inputs(
@@ -60,7 +74,8 @@ def process_inputs(
60
  openai_key,
61
  openai_base_url, # 新增参数
62
  elevenlabs_key,
63
- word_count,
 
64
  conversation_style,
65
  roles_person1,
66
  roles_person2,
@@ -75,6 +90,11 @@ def process_inputs(
75
  tts_openai_question,
76
  tts_openai_answer,
77
  ending_message,
 
 
 
 
 
78
  ):
79
  try:
80
  logger.info("Starting podcast generation process")
@@ -82,14 +102,20 @@ def process_inputs(
82
  # API key handling
83
  logger.debug("Setting API keys")
84
  os.environ["GEMINI_API_KEY"] = get_api_key("GEMINI_API_KEY", gemini_key)
85
-
 
 
 
 
 
 
 
 
86
  if tts_model == "openai":
87
- logger.debug("Setting OpenAI API key")
88
- if not openai_key and not os.getenv("OPENAI_API_KEY"):
89
- raise ValueError("OpenAI API key is required when using OpenAI TTS model")
90
- os.environ["OPENAI_API_KEY"] = get_api_key("OPENAI_API_KEY", openai_key)
91
- if openai_base_url:
92
- os.environ["OPENAI_API_BASE"] = openai_base_url
93
 
94
  if tts_model == "elevenlabs":
95
  logger.debug("Setting ElevenLabs API key")
@@ -151,7 +177,8 @@ def process_inputs(
151
  # Prepare conversation config
152
  logger.debug("Preparing conversation config")
153
  conversation_config = {
154
- "word_count": word_count,
 
155
  "conversation_style": conversation_style.split(','),
156
  "roles_person1": roles_person1,
157
  "roles_person2": roles_person2,
@@ -186,6 +213,10 @@ def process_inputs(
186
  image_paths=image_paths if image_paths else None,
187
  tts_model=tts_model,
188
  conversation_config=conversation_config,
 
 
 
 
189
  )
190
 
191
  logger.info("Podcast generation completed")
@@ -254,7 +285,7 @@ with gr.Blocks(
254
  label="Gemini API Key",
255
  type="password",
256
  value="",
257
- info="必须的"
258
  )
259
  openai_key = gr.Textbox(
260
  label="OpenAI API Key",
@@ -332,13 +363,45 @@ with gr.Blocks(
332
  </h3>
333
  """,
334
  )
335
- word_count = gr.Slider(
336
- minimum=500,
337
- maximum=5000,
338
- value=2000,
339
- step=100,
340
- label="字数统计",
341
- info="目标字数(用于生成内容)学术辩论:3000。讲故事:1000"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  )
343
 
344
  conversation_style = gr.Textbox(
@@ -414,12 +477,6 @@ with gr.Blocks(
414
  info="播客使用的语言"
415
  )
416
 
417
- # longform = gr.Checkbox(
418
- # label="长篇模式",
419
- # value=False,
420
- # info="启用长篇内容生成模式"
421
- # )
422
-
423
  # Voice Settings
424
  gr.Markdown(
425
  """
@@ -434,22 +491,35 @@ with gr.Blocks(
434
  info="结束语"
435
  )
436
  tts_model = gr.Radio(
437
- choices=["openai", "elevenlabs", "edge"],
438
  value="openai",
439
  label="文本转语音模型",
440
  info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
441
  )
442
- tts_openai_question = gr.Dropdown(
443
- choices={voice["name"]: voice["id"] for voice in VOICE_OPTIONS},
444
- value=VOICE_OPTIONS[27]["id"], # 默认选择选项
445
- label="OpenAI TTS 主持人",
446
- info="选择OpenAI TTS 主持人角色语音"
447
- )
448
- tts_openai_answer = gr.Dropdown(
449
- choices={voice["name"]: voice["id"] for voice in VOICE_OPTIONS},
450
- value=VOICE_OPTIONS[31]["id"], # 默认选择选项
451
- label="OpenAI TTS 嘉宾",
452
- info="选择OpenAI TTS 嘉宾角色语音"
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  )
454
 
455
  # Advanced Settings
@@ -469,16 +539,60 @@ with gr.Blocks(
469
  info="一些额外的指令,用来帮助AI更好地理解你想要聊天的内容和方向"
470
  )
471
 
472
- # api_key_label = gr.Textbox(
473
- # label="自定义基于云的 LLM",
474
  # value="GEMINI_API_KEY",
475
- # info="可选,默认使用 Gemini,如使用 OPENAI,上面填入 'OPENAI_API_KEY' 并保证设置好环境变量且设置好下面的模型"
 
476
  # )
477
 
478
- # llm_model_name = gr.Textbox(
479
- # label="设置好对应自定义基于云的 LLM 模型",
480
- # value="gemini-1.5-pro-latest",
481
- # info="可选,配合上面的参数,默认是 Gemini 的 gemini-1.5-pro-latest,默认 OPENAI 可支持模型 api.168369.xyz/v1/models 获取"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
  # )
483
 
484
  # Output Section
@@ -504,12 +618,13 @@ with gr.Blocks(
504
  text_input, urls_input, pdf_files, image_files,
505
  gemini_key, openai_key, openai_base_url,
506
  elevenlabs_key,
507
- word_count, conversation_style,
508
  roles_person1, roles_person2,
509
  dialogue_structure, podcast_name,
510
  podcast_tagline, output_language, tts_model,
511
  creativity_level, user_instructions,
512
  engagement_techniques, tts_openai_question, tts_openai_answer, ending_message,
 
513
  ],
514
  outputs=audio_output
515
  )
 
5
  from podcastfy.client import generate_podcast
6
  from dotenv import load_dotenv
7
 
8
+ import requests
9
+ import json
10
+
11
  # Configure logging
12
  logging.basicConfig(level=logging.DEBUG)
13
  logger = logging.getLogger(__name__)
 
51
  {"id": "shimmer", "name": "shimmer"},
52
  ]
53
 
54
+ # 添加 API Keys 轮询功能
55
+ def get_next_gemini_key(api_keys):
56
+ keys = [k.strip() for k in api_keys.split(',') if k.strip()]
57
+ if not hasattr(get_next_gemini_key, 'current_index'):
58
+ get_next_gemini_key.current_index = 0
59
+ key = keys[get_next_gemini_key.current_index]
60
+ get_next_gemini_key.current_index = (get_next_gemini_key.current_index + 1) % len(keys)
61
+ return key
62
+
63
  def get_api_key(key_name, ui_value):
64
+ if key_name == "GEMINI_API_KEY" and ui_value and ',' in ui_value:
65
+ return get_next_gemini_key(ui_value)
66
  return ui_value if ui_value else os.getenv(key_name)
67
 
68
  def process_inputs(
 
74
  openai_key,
75
  openai_base_url, # 新增参数
76
  elevenlabs_key,
77
+ max_num_chunks,
78
+ min_chunk_size,
79
  conversation_style,
80
  roles_person1,
81
  roles_person2,
 
90
  tts_openai_question,
91
  tts_openai_answer,
92
  ending_message,
93
+ longform,
94
+ llm_model_name,
95
+ #api_key_label,
96
+ #gemini_model,
97
+ #openai_model,
98
  ):
99
  try:
100
  logger.info("Starting podcast generation process")
 
102
  # API key handling
103
  logger.debug("Setting API keys")
104
  os.environ["GEMINI_API_KEY"] = get_api_key("GEMINI_API_KEY", gemini_key)
105
+
106
+ logger.debug("Setting OpenAI API key")
107
+ if not openai_key and not os.getenv("OPENAI_API_KEY"):
108
+ raise ValueError("OpenAI API key is required when using OpenAI TTS model")
109
+ os.environ["OPENAI_API_KEY"] = get_api_key("OPENAI_API_KEY", openai_key)
110
+
111
+ # if api_key_label == "OPENAI_API_KEY":
112
+ os.environ["OPENAI_API_BASE"] = get_api_key("OPENAI_BASE_URL", openai_base_url)
113
+
114
  if tts_model == "openai":
115
+ os.environ["OPENAI_BASE_URL"] = get_api_key("OPENAI_BASE_URL", openai_base_url)
116
+ # 根据选择的名称找到对应的 voice ID
117
+ tts_openai_question = next(voice["id"] for voice in VOICE_OPTIONS if voice["name"] == tts_openai_question)
118
+ tts_openai_answer = next(voice["id"] for voice in VOICE_OPTIONS if voice["name"] == tts_openai_answer)
 
 
119
 
120
  if tts_model == "elevenlabs":
121
  logger.debug("Setting ElevenLabs API key")
 
177
  # Prepare conversation config
178
  logger.debug("Preparing conversation config")
179
  conversation_config = {
180
+ "max_num_chunks": max_num_chunks,
181
+ "min_chunk_size": min_chunk_size,
182
  "conversation_style": conversation_style.split(','),
183
  "roles_person1": roles_person1,
184
  "roles_person2": roles_person2,
 
213
  image_paths=image_paths if image_paths else None,
214
  tts_model=tts_model,
215
  conversation_config=conversation_config,
216
+ longform=longform,
217
+ llm_model_name=llm_model_name,
218
+ api_key_label="OPENAI_API_KEY",
219
+ #llm_model_name=get_active_model(api_key_label, gemini_model, openai_model),
220
  )
221
 
222
  logger.info("Podcast generation completed")
 
285
  label="Gemini API Key",
286
  type="password",
287
  value="",
288
+ info="必须的,多个key请用逗号分隔"
289
  )
290
  openai_key = gr.Textbox(
291
  label="OpenAI API Key",
 
363
  </h3>
364
  """,
365
  )
366
+ llm_model_name = gr.Radio(
367
+ choices=["gemini-1.5-pro-latest", "gemini-exp-1121", "learnlm-1.5-pro-experimental", "o1-mini", "o1-preview", "gpt-4o-mini", "gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-4-turbo-2024-04-09", "claude-3-5-sonnet-20240620", "claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022"],
368
+ value="gemini-1.5-pro-latest",
369
+ label="文本生成模型",
370
+ info="默认使用 gemini-1.5-pro-latest "
371
+ )
372
+
373
+ longform = gr.Checkbox(
374
+ label="长篇模式",
375
+ value=False,
376
+ info="启用长篇内容生成模式,启用长篇需要Google Cloud支持,设置好GOOGLE_API_KEY"
377
+ )
378
+ with gr.Group(visible=False) as longform_settings_group:
379
+ max_num_chunks = gr.Slider(
380
+ minimum=1,
381
+ maximum=20,
382
+ value=8,
383
+ step=1,
384
+ label="最大轮数",
385
+ info="长篇模式下,生成的最大轮数"
386
+ )
387
+ min_chunk_size = gr.Slider(
388
+ minimum=300,
389
+ maximum=2000,
390
+ value=600,
391
+ step=100,
392
+ label="一轮最小字符数",
393
+ info="长篇模式下,生成一轮所需的最小字符数"
394
+ )
395
+
396
+ # 添加更新可见性的函数
397
+ def update_longform_settings(is_longform):
398
+ return gr.update(visible=is_longform)
399
+
400
+ # 添加事件监听
401
+ longform.change(
402
+ fn=update_longform_settings,
403
+ inputs=[longform],
404
+ outputs=[longform_settings_group]
405
  )
406
 
407
  conversation_style = gr.Textbox(
 
477
  info="播客使用的语言"
478
  )
479
 
 
 
 
 
 
 
480
  # Voice Settings
481
  gr.Markdown(
482
  """
 
491
  info="结束语"
492
  )
493
  tts_model = gr.Radio(
494
+ choices=["openai", "geminimulti", "elevenlabs", "gemini", "edge"],
495
  value="openai",
496
  label="文本转语音模型",
497
  info="选择语音合成模型 (edge 免费但音质较差, 其他模型音质更好但需申请 API keys)"
498
  )
499
+ with gr.Group(visible=True) as openai_voice_group:
500
+ tts_openai_question = gr.Dropdown(
501
+ choices=[voice["name"] for voice in VOICE_OPTIONS],
502
+ value=VOICE_OPTIONS[27]["name"],
503
+ label="OpenAI TTS 主持人",
504
+ info="选择OpenAI TTS 主持人角色语音"
505
+ )
506
+
507
+ tts_openai_answer = gr.Dropdown(
508
+ choices=[voice["name"] for voice in VOICE_OPTIONS],
509
+ value=VOICE_OPTIONS[31]["name"],
510
+ label="OpenAI TTS 嘉宾",
511
+ info="选择OpenAI TTS 嘉宾角色语音"
512
+ )
513
+
514
+ # 添加更新可见性的函数
515
+ def update_voice_options(tts_model):
516
+ return gr.update(visible=(tts_model == "openai"))
517
+
518
+ # 添加事件监听
519
+ tts_model.change(
520
+ fn=update_voice_options,
521
+ inputs=[tts_model],
522
+ outputs=[openai_voice_group]
523
  )
524
 
525
  # Advanced Settings
 
539
  info="一些额外的指令,用来帮助AI更好地理解你想要聊天的内容和方向"
540
  )
541
 
542
+ # api_key_label = gr.Radio(
543
+ # choices=["GEMINI_API_KEY", "OPENAI_API_KEY"],
544
  # value="GEMINI_API_KEY",
545
+ # label="文本生成模型供应商",
546
+ # info="默认使用 Gemini "
547
  # )
548
 
549
+ # with gr.Group(visible=True) as gemini_llm_group:
550
+ # gemini_model = gr.Radio(
551
+ # choices=["gemini-1.5-pro-latest", "gemini-exp-1121", "learnlm-1.5-pro-experimental"],
552
+ # value="gemini-1.5-pro-latest",
553
+ # label="Gemini 文本生成模型",
554
+ # info="默认使用 gemini-1.5-pro-latest "
555
+ # )
556
+
557
+ # def fetch_openai_models():
558
+ # try:
559
+ # response = requests.get("https://api.168369.xyz/v1/models")
560
+ # data = response.json()
561
+ # 提取所有模型的 id
562
+ # model_ids = [model["id"] for model in data["data"]]
563
+ # return model_ids
564
+ # except Exception as e:
565
+ # print(f"获取模型列表失败: {str(e)}")
566
+ # return ["获取模型列表失败"]
567
+
568
+ # with gr.Group(visible=False) as openai_llm_group:
569
+ # openai_model = gr.Radio(
570
+ #choices=fetch_openai_models(), # 从 API 获取模型列表
571
+ # choices=["o1-mini", "o1-preview", "gpt-4o-mini", "gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-4-turbo-2024-04-09"],
572
+ # value="gpt-4o-mini",
573
+ # label="Openai 文本生成模型",
574
+ # info="默认为 gpt-4o-mini"
575
+ # )
576
+
577
+ # 添加获取当前有效模型的函数
578
+ # def get_active_model(api_key_label, gemini_model, openai_model):
579
+ # if api_key_label == "GEMINI_API_KEY":
580
+ # return gemini_model
581
+ # else: # OPENAI_API_KEY
582
+ # return openai_model
583
+
584
+ # 添加更新可见性的函数
585
+ # def update_llm_options(api_key_label):
586
+ # if api_key_label == "GEMINI_API_KEY":
587
+ # return gr.update(visible=True), gr.update(visible=False)
588
+ # else: # OPENAI_API_KEY
589
+ # return gr.update(visible=False), gr.update(visible=True)
590
+
591
+ # 添加事件监听
592
+ # api_key_label.change(
593
+ # fn=update_llm_options,
594
+ # inputs=[api_key_label],
595
+ # outputs=[gemini_llm_group, openai_llm_group]
596
  # )
597
 
598
  # Output Section
 
618
  text_input, urls_input, pdf_files, image_files,
619
  gemini_key, openai_key, openai_base_url,
620
  elevenlabs_key,
621
+ max_num_chunks, min_chunk_size, conversation_style,
622
  roles_person1, roles_person2,
623
  dialogue_structure, podcast_name,
624
  podcast_tagline, output_language, tts_model,
625
  creativity_level, user_instructions,
626
  engagement_techniques, tts_openai_question, tts_openai_answer, ending_message,
627
+ longform, llm_model_name, #api_key_label, gemini_model, openai_model,
628
  ],
629
  outputs=audio_output
630
  )