# -*- coding: utf-8 -*-
# 財政部財政資訊中心 江信宗
import gradio as gr
import openai
import os
import re
from pydub import AudioSegment
import uuid
import edge_tts
import json
def create_client(api_key=None):
if api_key:
openai.api_key = api_key
else:
openai.api_key = os.getenv("YOUR_API_TOKEN")
return openai.OpenAI(api_key=openai.api_key, base_url="https://api.sambanova.ai/v1")
def generate_response(input_text, language, speaker1, speaker2, api_key):
speaker1_name = speaker1.split(' - ')[0]
speaker2_name = speaker2.split(' - ')[0]
if language == "Auto Detect":
language_instruction = "- The podcast MUST be in the same language as the user input."
else:
language_instruction = f"- The podcast Must reply to me in {language} language."
example = """
{
"topic": "AGI",
"podcast": [
{
"speaker": 1,
"line": "Welcome to the 財資歐北共 Podcast. I am the host {speaker1_name}. Today we have invited an expert {speaker2_name} to join our program despite his busy schedule."
},
{
"speaker": 2,
"line": "Hello everyone, I am {speaker2_name}, I am honored to come and chat with you."
},
{
"speaker": 1,
"line": "Today we will discuss a very interesting topic..."
},
{
"speaker": 2,
"line": "Yes, this topic is indeed fascinating. Let's start with..."
},
…………,
{
"speaker": 1,
"line": "Thank you {speaker2_name} for your professional sharing. Welcome to subscribe to the Wishing Podcast. Thank you and goodbye."
}
]
}
"""
system_prompt = f"""你的任務是將提供的輸入文字轉換為一個引人入勝、訊息豐富且專業的Podcast對話。輸入文字可能會比較混亂或結構不完整,因為它可能來自不同來源,如PDF檔案或文字檔等。不要擔心格式問題或任何不相關的訊息;你的目標是提取可以在Podcast中討論的關鍵點、識別重要定義,並突出有趣的事實。
以下是你將要處理的輸入文字:
{{input_text}}
首先,仔細閱讀輸入文字,找出主要話題、關鍵點,以及任何有趣的事實或軼事。思考如何將這些訊息以一種有趣且吸引人的方式呈現出來,適合高質量的音訊Podcast。
頭腦風暴一些創造性的方法來討論你在輸入文字中識別出的主要話題、關鍵點及任何有趣的事實或軼事。可以考慮使用類比、講故事技巧或假設情境來讓內容對聽眾更加貼近和有趣。
請記住,你的Podcast應當易於普通聽眾理解,所以避免使用過多的專業術語或假設聽眾對該話題已有瞭解。如有必要,請思考如何用簡單的術語簡要解釋任何複雜的概念。
利用你的想像力填補輸入文字中的任何空白,或者想出一些值得探討與發人深省的問題,以供Podcast討論。目標是創造一個訊息豐富且娛樂性強的對話,因此可以在你的方法上大膽自由發揮創意。
將你的頭腦風暴想法和Podcast對話的粗略大綱寫在這裡。確保記錄下你希望在結尾重申的主要見解和要點。
現在你已經進行了頭腦風暴並建立了一個粗略的大綱,是時候撰寫實際的Podcast對話了。目標是主持人({speaker1_name})與嘉賓({speaker2_name})之間自然、對話式的交流。融入你在頭腦風暴中得出的最佳想法,並確保將任何複雜話題以易於理解的方式解釋清楚。
{language_instruction}
- The podcast should have 2 speakers: {speaker1_name} and {speaker2_name}.
- The podcast should be long.
- The podcast should be interesting, lively, and engaging, and hook the listener from the start.
- The script must be in JSON format.
Follow this JSON example structure, MUST be in {language} language:
{example}
根據你在頭腦風暴階段提出的關鍵點和創造性想法,撰寫你的引人入勝、訊息豐富的Podcast對話。採用對話式的語氣,並包括任何必要的上下文或解釋,使內容對一般聽眾而言容易理解。使用主持人名字 {speaker1_name} 和嘉賓名字 {speaker2_name},以營造更吸引人和身臨其境的聆聽體驗。不要包括像[主持人]或[嘉賓]這樣的括號預留位置。設計你的輸出內容以供直接朗讀——它將直接轉換為音訊。
確保對話儘可能詳細、完整,同時保持在主題之內並維持吸引人的流暢性。目標是使用你的全部輸出容量,建立儘可能長的Podcast節目,同時以有趣的方式傳遞輸入文字中的關鍵訊息。
在對話結束時,讓主持人和嘉賓自然總結他們討論中的主要見解和要點。這應當是對話的隨機部分,以自然隨意而非明顯的總結——目的是在結束前最後一次以自然流暢的方式強化核心思想。最終以感謝詞結束。
"""
client = create_client(api_key)
response = client.chat.completions.create(
model="Meta-Llama-3.1-405B-Instruct",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": input_text}
],
temperature=1
)
try:
podcast_match = re.search(r'{.*}', response.choices[0].message.content, re.DOTALL)
if podcast_match:
podcast_json = podcast_match.group(0)
try:
json.loads(podcast_json)
except json.JSONDecodeError:
podcast_json = re.sub(r',\s*}', '}', podcast_json)
podcast_json = re.sub(r',\s*]', ']', podcast_json)
return podcast_json
else:
raise gr.Error("生成 Podcast 劇本失敗!!請稍後再試。")
except Exception as e:
if "API key not valid" in str(e):
raise gr.Error("無效的 API 金鑰!!請提供有效的 API 金鑰。")
elif "rate limit" in str(e).lower():
raise gr.Error("API 金鑰使用額度已超過限制!!請稍後再試或使用其他 API 金鑰。")
else:
raise gr.Error(f"生成 Podcast 劇本失敗!!請稍後再試。")
async def tts_generate(input_text, speaker1, speaker2):
voice_names = {
"家豪 - 臺灣國語 (Male)": "zh-TW-YunJheNeural",
"淑芬 - 臺灣國語 (Female)": "zh-TW-HsiaoChenNeural",
"子晴 - 臺灣國語 (Female)": "zh-TW-HsiaoYuNeural",
"品妍 - 中文 (Female)": "zh-CN-XiaoxiaoNeural",
"美玲 - 中文 (Female)": "zh-CN-XiaoyiNeural",
"建宏 - 中文 (Male)": "zh-CN-YunjianNeural",
"品睿 - 中文 (Male)": "zh-CN-YunxiNeural",
"宥廷 - 中文 (Male)": "zh-CN-YunxiaNeural",
"志明 - 中文 (Male)": "zh-CN-YunyangNeural",
"雨霏 - 中文 (Female)": "zh-CN-liaoning-XiaobeiNeural",
"Andrew - English (Male)": "en-US-AndrewMultilingualNeural",
"Ava - English (Female)": "en-US-AvaMultilingualNeural",
"Brian - English (Male)": "en-US-BrianMultilingualNeural",
"Emma - English (Female)": "en-US-EmmaMultilingualNeural",
"Florian - German (Male)": "de-DE-FlorianMultilingualNeural",
"Seraphina - German (Female)": "de-DE-SeraphinaMultilingualNeural",
"Remy - French (Male)": "fr-FR-RemyMultilingualNeural",
"Vivienne - French (Female)": "fr-FR-VivienneMultilingualNeural"
}
speaker1_voice = voice_names[speaker1]
speaker2_voice = voice_names[speaker2]
try:
podcast_dict = json.loads(input_text)
except json.JSONDecodeError:
cleaned_input = re.sub(r',\s*}', '}', input_text)
cleaned_input = re.sub(r',\s*]', ']', cleaned_input)
podcast_dict = json.loads(cleaned_input)
podcast_json = {
"topic": podcast_dict.get("topic", "Unknown Topic"),
"podcast": []
}
speaker_map = {
1: "speaker1",
2: "speaker2"
}
combined = AudioSegment.empty()
for line in podcast_dict.get("podcast", []):
speaker = line.get("speaker")
text = line.get("line", "")
voice = speaker1_voice if speaker == 1 else speaker2_voice
voice_name = speaker1.split(' - ')[0] if speaker == 1 else speaker2.split(' - ')[0]
communicate = edge_tts.Communicate(text, voice)
audio_file = f"{voice_name}_{uuid.uuid4()}.mp3"
await communicate.save(audio_file)
audio = AudioSegment.from_mp3(audio_file)
combined += audio
os.remove(audio_file)
podcast_json["podcast"].append({
"speaker": speaker_map.get(speaker, speaker),
"line": text
})
output_file = f"Jiangxz_{uuid.uuid4()}.mp3"
combined.export(output_file, format="mp3")
return output_file
async def process_podcast(input_text, language, speaker1, speaker2, api_key):
podcast_script = generate_response(input_text, language, speaker1, speaker2, api_key)
audio_file = await tts_generate(podcast_script, speaker1, speaker2)
return podcast_script, audio_file
custom_css = """
body {
background-color: #f0f8ff;
}
.gradio-container {
background-color: #f0f8ff;
border-radius: 20px;
box-shadow: 0 0 20px rgba(0,0,0,0.1);
}
.center-aligned {
text-align: center !important;
color: #ff4081;
text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
}
.gen-button {
border-radius: 10px !important;
background-color: #ff4081 !important;
color: white !important;
font-weight: bold !important;
transition: all 0.3s ease !important;
}
.gen-button:hover {
background-color: #f50057 !important;
transform: scale(1.05);
}
.gr-input, .gr-box, .gr-dropdown {
border-radius: 10px !important;
border: 2px solid #ff4081 !important;
}
.gr-input:focus, .gr-box:focus, .gr-dropdown:focus {
border-color: #f50057 !important;
box-shadow: 0 0 0 2px rgba(245,0,87,0.2) !important;
}
.input-background {
background-color: #B7E0FF !important;
padding: 15px !important;
border-radius: 10px !important;
}
.lng-background {
background-color: #FFF5CD !important;
padding: 5px !important;
border-radius: 10px !important;
}
.sk1-background {
background-color: #FFF5CD !important;
padding: 5px !important;
border-radius: 10px !important;
}
.sk2-background {
background-color: #FFF5CD !important;
padding: 5px !important;
border-radius: 10px !important;
}
.api-background {
background-color: #FFCFB3 !important;
padding: 5px !important;
border-radius: 10px !important;
}
.audio-background {
background-color: #E78F81 !important;
padding: 5px !important;
border-radius: 10px !important;
}
"""
with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
gr.Markdown("# 🎙️ 自動生成 Podcast 節目及音檔。系統布署:江信宗。 🎙️", elem_classes="center-aligned")
input_text = gr.Textbox(label="請輸入 Podcast 話題(建議50~500字之間)", placeholder="輸入 Podcast 話題內容,越詳細劇本越佳......", elem_classes="input-background")
with gr.Row():
Language = gr.Dropdown(
choices=["繁體中文", "Auto Detect", "English", "日本語", "한국어"],
value="繁體中文",
label="語言",
interactive=True,
scale=1,
elem_classes="lng-background"
)
speaker_choices = [
"家豪 - 臺灣國語 (Male)",
"淑芬 - 臺灣國語 (Female)",
"子晴 - 臺灣國語 (Female)",
"品睿 - 中文 (Male)",
"品妍 - 中文 (Female)",
"志明 - 中文 (Male)",
"美玲 - 中文 (Female)",
"建宏 - 中文 (Male)",
"宥廷 - 中文 (Male)",
"雨霏 - 中文 (Female)",
"Andrew - English (Male)",
"Ava - English (Female)",
"Brian - English (Male)",
"Emma - English (Female)",
"Florian - German (Male)",
"Seraphina - German (Female)",
"Remy - French (Male)",
"Vivienne - French (Female)"
]
Speaker_1 = gr.Dropdown(
choices=speaker_choices,
value="家豪 - 臺灣國語 (Male)",
label="主持人的語音",
interactive=True,
scale=2,
elem_classes="sk1-background"
)
Speaker_2 = gr.Dropdown(
choices=speaker_choices,
value="品妍 - 中文 (Female)",
label="來賓的語音",
interactive=True,
scale=2,
elem_classes="sk2-background"
)
with gr.Row():
generate_button = gr.Button("生成", scale=2, elem_classes="gen-button")
api_key = gr.Textbox(label="API Key", type="password", placeholder="API authentication key for large language models", scale=1, elem_classes="api-background")
audio_output = gr.Audio(label="Generated Podcast Audio", elem_classes="audio-background")
podcast_script = gr.Textbox(label="Generated Podcast 劇本")
generate_button.click(fn=process_podcast, inputs=[input_text, Language, Speaker_1, Speaker_2, api_key], outputs=[podcast_script, audio_output])
if __name__ == "__main__":
if "SPACE_ID" in os.environ:
iface.launch()
else:
iface.launch(share=True, show_api=False)