Speech2MSummary / app.py
John Liao
Update app.py
5cfb839 verified
import subprocess
import sys
import os
from pydub import AudioSegment
try:
import openai
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "openai"])
import openai # Import the library after installing it
def transcribe(filename,key):
client = openai.OpenAI(api_key = key)
audio_file = open(filename, "rb")
transcript_txt = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text"
)
return transcript_txt
def openai_api(prompt, key):
openai.api_key = key
completion = openai.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}]
)
return completion.choices[0].message.content
def transcribe_large_audio(filename, key, segment_length_ms = 30 * 60 *1000):
def get_file_size_in_mb(file_path):
return os.path.getsize(file_path) / (1024 * 1024)
def split_audio_file(file_path, segment_length_ms = 30 * 60 *1000):
audio = AudioSegment.from_file(filename, format="mp3")
segment_filenames = []
for i in range(0, len(audio), segment_length_ms):
end = min(i + segment_length_ms, len(audio))
segment = audio[i:end]
segment_filename = f"{file_path}_part{len(segment_filenames) + 1}.mp3"
segment.export(segment_filename, format="mp3", bitrate="36k")
segment_filenames.append(segment_filename)
return segment_filenames
openai.api_key = key
transcript_txt = ""
if get_file_size_in_mb(filename) > 25:
# Split the audio file if it exceeds the chunk size
audio_chunks = split_audio_file(filename)
print(audio_chunks)
# Process each chunk separately
for chunk_filename in audio_chunks:
print(chunk_filename)
transcript_txt += transcribe(chunk_filename, key)
# Remove the temporary chunk file
os.remove(chunk_filename)
else:
transcript_txt = transcribe(filename, key)
return transcript_txt
def setup_gradio_interface():
with gr.Blocks() as demo:
gr.Markdown("音頻轉文字,並擷取重點")
with gr.Tab("請依順序操作"):
with gr.Row():
file_input = gr.File(label="第一步:請上傳檔案")
api_key_input = gr.Textbox(label="第二步:請輸入OpenAI API金鑰", placeholder="OpenAI API Key")
submit_button = gr.Button("第三步:開始轉譯")
file_output_txt = gr.File(label="第四步:下載逐字稿(Optional)")
with gr.Row():
content = gr.Textbox(label="第五步:檢視轉譯逐字稿", value="轉譯逐字稿")
submit2_button = gr.Button("第六步:開始重點摘錄")
summary = gr.Textbox(label="第七步:輸出重點摘錄(Markdown格式)", value="重點摘錄")
file_output2_txt = gr.File(label="第八步:下載重點摘錄(Optional)")
def transcribe_and_download(file, key):
if file is not None:
txt_content = transcribe_large_audio(file,key)
txt_path = "transcribe.txt"
with open(txt_path, "w") as txt_file:
txt_file.write(txt_content)
return txt_content, txt_path
def transcribe_and_summary(text, key):
if text is not None:
prompt = "請扮演文書處理專家,幫我把「會議逐字稿」作「重點摘錄」,逐字稿如下:" + text
summary = openai_api(prompt, key)
txt_path = "summary.txt"
with open(txt_path, "w") as txt_file:
txt_file.write(summary)
return summary, txt_path
submit_button.click(
transcribe_and_download,
inputs=[file_input, api_key_input],
outputs=[content, file_output_txt]
)
submit2_button.click(
transcribe_and_summary,
inputs=[content, api_key_input],
outputs=[summary, file_output2_txt]
)
return demo
# First, try importing gradio. If it fails, attempt to install it.
try:
import gradio as gr
except ImportError:
import sys
import gradio as gr
# Run the interface
if __name__ == "__main__":
demo = setup_gradio_interface()
demo.launch()