File size: 4,760 Bytes
e8e93a8
 
5cfb839
 
e8e93a8
 
 
 
 
 
e514185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cfb839
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e514185
 
 
 
 
 
 
 
 
 
 
 
5cfb839
e514185
 
 
 
5cfb839
e514185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import subprocess
import sys
import os
from pydub import AudioSegment

try:
    import openai
except ImportError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "openai"])
    import openai  # Import the library after installing it

def transcribe(filename,key):
  client = openai.OpenAI(api_key = key)
  audio_file = open(filename, "rb")
  transcript_txt = client.audio.transcriptions.create(
    model="whisper-1",
    file=audio_file,
    response_format="text"
  )
  return transcript_txt

def openai_api(prompt, key):
  openai.api_key = key
  completion = openai.chat.completions.create(
      model="gpt-4o",
      messages=[{"role": "user", "content": prompt}]
      )
  return completion.choices[0].message.content

def transcribe_large_audio(filename, key, segment_length_ms = 30 * 60 *1000):
    def get_file_size_in_mb(file_path):
        return os.path.getsize(file_path) / (1024 * 1024)

    def split_audio_file(file_path, segment_length_ms = 30 * 60 *1000):
        audio = AudioSegment.from_file(filename, format="mp3")
        segment_filenames = []
        for i in range(0, len(audio), segment_length_ms):
            end = min(i + segment_length_ms, len(audio))
            segment = audio[i:end]
            segment_filename = f"{file_path}_part{len(segment_filenames) + 1}.mp3"
            segment.export(segment_filename, format="mp3", bitrate="36k")
            segment_filenames.append(segment_filename)
        return segment_filenames
    
    openai.api_key = key
    
    transcript_txt = ""
    
    if get_file_size_in_mb(filename) > 25:
        # Split the audio file if it exceeds the chunk size
        audio_chunks = split_audio_file(filename)
        print(audio_chunks) 
        # Process each chunk separately
        for chunk_filename in audio_chunks:
            print(chunk_filename)
            transcript_txt += transcribe(chunk_filename, key)            
            # Remove the temporary chunk file
            os.remove(chunk_filename)
    else:
        transcript_txt = transcribe(filename, key)   
    
    return transcript_txt
    
def setup_gradio_interface():
    with gr.Blocks() as demo:
          gr.Markdown("音頻轉文字,並擷取重點")
          with gr.Tab("請依順序操作"):
                  with gr.Row():
                          file_input = gr.File(label="第一步:請上傳檔案")
                          api_key_input = gr.Textbox(label="第二步:請輸入OpenAI API金鑰", placeholder="OpenAI API Key")
                          submit_button = gr.Button("第三步:開始轉譯")
                          file_output_txt = gr.File(label="第四步:下載逐字稿(Optional)")
                  with gr.Row():                          
                          content = gr.Textbox(label="第五步:檢視轉譯逐字稿", value="轉譯逐字稿")
                          submit2_button = gr.Button("第六步:開始重點摘錄")
                          summary = gr.Textbox(label="第七步:輸出重點摘錄(Markdown格式)", value="重點摘錄")
                          file_output2_txt = gr.File(label="第八步:下載重點摘錄(Optional)")

          def transcribe_and_download(file, key):
                  if file is not None:
                          txt_content = transcribe_large_audio(file,key)
                          txt_path = "transcribe.txt"
                          with open(txt_path, "w") as txt_file:
                              txt_file.write(txt_content)
                          return txt_content, txt_path
          def transcribe_and_summary(text, key):
                  if text is not None:
                          prompt = "請扮演文書處理專家,幫我把「會議逐字稿」作「重點摘錄」,逐字稿如下:" + text
                          summary = openai_api(prompt, key)
                          txt_path = "summary.txt"
                          with open(txt_path, "w") as txt_file:
                              txt_file.write(summary)
                          return summary, txt_path

          submit_button.click(
                  transcribe_and_download,
                  inputs=[file_input, api_key_input],
                  outputs=[content, file_output_txt]
          )          
          submit2_button.click(
                  transcribe_and_summary,
                  inputs=[content, api_key_input],
                  outputs=[summary, file_output2_txt]
          )
    return demo

# First, try importing gradio. If it fails, attempt to install it.
try:
    import gradio as gr
except ImportError:
    import sys
    import gradio as gr

# Run the interface
if __name__ == "__main__":
    demo = setup_gradio_interface()
    demo.launch()