Spaces:
Running
Running
John Liao
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import subprocess
|
2 |
import sys
|
|
|
|
|
3 |
|
4 |
try:
|
5 |
import openai
|
@@ -25,6 +27,40 @@ def openai_api(prompt, key):
|
|
25 |
)
|
26 |
return completion.choices[0].message.content
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def setup_gradio_interface():
|
29 |
with gr.Blocks() as demo:
|
30 |
gr.Markdown("音頻轉文字,並擷取重點")
|
@@ -37,12 +73,12 @@ def setup_gradio_interface():
|
|
37 |
with gr.Row():
|
38 |
content = gr.Textbox(label="第五步:檢視轉譯逐字稿", value="轉譯逐字稿")
|
39 |
submit2_button = gr.Button("第六步:開始重點摘錄")
|
40 |
-
summary = gr.Textbox(label="第七步:輸出重點摘錄", value="重點摘錄")
|
41 |
file_output2_txt = gr.File(label="第八步:下載重點摘錄(Optional)")
|
42 |
|
43 |
def transcribe_and_download(file, key):
|
44 |
if file is not None:
|
45 |
-
txt_content =
|
46 |
txt_path = "transcribe.txt"
|
47 |
with open(txt_path, "w") as txt_file:
|
48 |
txt_file.write(txt_content)
|
|
|
1 |
import subprocess
|
2 |
import sys
|
3 |
+
import os
|
4 |
+
from pydub import AudioSegment
|
5 |
|
6 |
try:
|
7 |
import openai
|
|
|
27 |
)
|
28 |
return completion.choices[0].message.content
|
29 |
|
30 |
+
def transcribe_large_audio(filename, key, segment_length_ms = 30 * 60 *1000):
|
31 |
+
def get_file_size_in_mb(file_path):
|
32 |
+
return os.path.getsize(file_path) / (1024 * 1024)
|
33 |
+
|
34 |
+
def split_audio_file(file_path, segment_length_ms = 30 * 60 *1000):
|
35 |
+
audio = AudioSegment.from_file(filename, format="mp3")
|
36 |
+
segment_filenames = []
|
37 |
+
for i in range(0, len(audio), segment_length_ms):
|
38 |
+
end = min(i + segment_length_ms, len(audio))
|
39 |
+
segment = audio[i:end]
|
40 |
+
segment_filename = f"{file_path}_part{len(segment_filenames) + 1}.mp3"
|
41 |
+
segment.export(segment_filename, format="mp3", bitrate="36k")
|
42 |
+
segment_filenames.append(segment_filename)
|
43 |
+
return segment_filenames
|
44 |
+
|
45 |
+
openai.api_key = key
|
46 |
+
|
47 |
+
transcript_txt = ""
|
48 |
+
|
49 |
+
if get_file_size_in_mb(filename) > 25:
|
50 |
+
# Split the audio file if it exceeds the chunk size
|
51 |
+
audio_chunks = split_audio_file(filename)
|
52 |
+
print(audio_chunks)
|
53 |
+
# Process each chunk separately
|
54 |
+
for chunk_filename in audio_chunks:
|
55 |
+
print(chunk_filename)
|
56 |
+
transcript_txt += transcribe(chunk_filename, key)
|
57 |
+
# Remove the temporary chunk file
|
58 |
+
os.remove(chunk_filename)
|
59 |
+
else:
|
60 |
+
transcript_txt = transcribe(filename, key)
|
61 |
+
|
62 |
+
return transcript_txt
|
63 |
+
|
64 |
def setup_gradio_interface():
|
65 |
with gr.Blocks() as demo:
|
66 |
gr.Markdown("音頻轉文字,並擷取重點")
|
|
|
73 |
with gr.Row():
|
74 |
content = gr.Textbox(label="第五步:檢視轉譯逐字稿", value="轉譯逐字稿")
|
75 |
submit2_button = gr.Button("第六步:開始重點摘錄")
|
76 |
+
summary = gr.Textbox(label="第七步:輸出重點摘錄(Markdown格式)", value="重點摘錄")
|
77 |
file_output2_txt = gr.File(label="第八步:下載重點摘錄(Optional)")
|
78 |
|
79 |
def transcribe_and_download(file, key):
|
80 |
if file is not None:
|
81 |
+
txt_content = transcribe_large_audio(file,key)
|
82 |
txt_path = "transcribe.txt"
|
83 |
with open(txt_path, "w") as txt_file:
|
84 |
txt_file.write(txt_content)
|