Spaces:

EduTechTeam
/

Speech2MSummary

Sleeping

File size: 4,760 Bytes

import subprocess
import sys
import os
from pydub import AudioSegment

try:
    import openai
except ImportError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "openai"])
    import openai  # Import the library after installing it

def transcribe(filename,key):
  client = openai.OpenAI(api_key = key)
  audio_file = open(filename, "rb")
  transcript_txt = client.audio.transcriptions.create(
    model="whisper-1",
    file=audio_file,
    response_format="text"
  )
  return transcript_txt

def openai_api(prompt, key):
  openai.api_key = key
  completion = openai.chat.completions.create(
      model="gpt-4o",
      messages=[{"role": "user", "content": prompt}]
      )
  return completion.choices[0].message.content

def transcribe_large_audio(filename, key, segment_length_ms = 30 * 60 *1000):
    def get_file_size_in_mb(file_path):
        return os.path.getsize(file_path) / (1024 * 1024)

    def split_audio_file(file_path, segment_length_ms = 30 * 60 *1000):
        audio = AudioSegment.from_file(filename, format="mp3")
        segment_filenames = []
        for i in range(0, len(audio), segment_length_ms):
            end = min(i + segment_length_ms, len(audio))
            segment = audio[i:end]
            segment_filename = f"{file_path}_part{len(segment_filenames) + 1}.mp3"
            segment.export(segment_filename, format="mp3", bitrate="36k")
            segment_filenames.append(segment_filename)
        return segment_filenames
    
    openai.api_key = key
    
    transcript_txt = ""
    
    if get_file_size_in_mb(filename) > 25:
        # Split the audio file if it exceeds the chunk size
        audio_chunks = split_audio_file(filename)
        print(audio_chunks) 
        # Process each chunk separately
        for chunk_filename in audio_chunks:
            print(chunk_filename)
            transcript_txt += transcribe(chunk_filename, key)            
            # Remove the temporary chunk file
            os.remove(chunk_filename)
    else:
        transcript_txt = transcribe(filename, key)   
    
    return transcript_txt
    
def setup_gradio_interface():
    with gr.Blocks() as demo:
          gr.Markdown("音頻轉文字，並擷取重點")
          with gr.Tab("請依順序操作"):
                  with gr.Row():
                          file_input = gr.File(label="第一步：請上傳檔案")
                          api_key_input = gr.Textbox(label="第二步：請輸入OpenAI API金鑰", placeholder="OpenAI API Key")
                          submit_button = gr.Button("第三步：開始轉譯")
                          file_output_txt = gr.File(label="第四步：下載逐字稿(Optional)")
                  with gr.Row():                          
                          content = gr.Textbox(label="第五步：檢視轉譯逐字稿", value="轉譯逐字稿")
                          submit2_button = gr.Button("第六步：開始重點摘錄")
                          summary = gr.Textbox(label="第七步：輸出重點摘錄(Markdown格式)", value="重點摘錄")
                          file_output2_txt = gr.File(label="第八步：下載重點摘錄(Optional)")

          def transcribe_and_download(file, key):
                  if file is not None:
                          txt_content = transcribe_large_audio(file,key)
                          txt_path = "transcribe.txt"
                          with open(txt_path, "w") as txt_file:
                              txt_file.write(txt_content)
                          return txt_content, txt_path
          def transcribe_and_summary(text, key):
                  if text is not None:
                          prompt = "請扮演文書處理專家，幫我把「會議逐字稿」作「重點摘錄」，逐字稿如下：" + text
                          summary = openai_api(prompt, key)
                          txt_path = "summary.txt"
                          with open(txt_path, "w") as txt_file:
                              txt_file.write(summary)
                          return summary, txt_path

          submit_button.click(
                  transcribe_and_download,
                  inputs=[file_input, api_key_input],
                  outputs=[content, file_output_txt]
          )          
          submit2_button.click(
                  transcribe_and_summary,
                  inputs=[content, api_key_input],
                  outputs=[summary, file_output2_txt]
          )
    return demo

# First, try importing gradio. If it fails, attempt to install it.
try:
    import gradio as gr
except ImportError:
    import sys
    import gradio as gr

# Run the interface
if __name__ == "__main__":
    demo = setup_gradio_interface()
    demo.launch()