import whisper import gradio as gr import openai import os openai.api_key = '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' import whisper import gradio as gr model = whisper.load_model("small") def transcribe(audio): #time.sleep(3) # load audio and pad/trim it to fit 30 seconds audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) # make log-Mel spectrogram and move to the same device as the model mel = whisper.log_mel_spectrogram(audio).to(model.device) # detect the spoken language _, probs = model.detect_language(mel) print(f"Detected language: {max(probs, key=probs.get)}") # decode the audio options = whisper.DecodingOptions(fp16 = False) result = whisper.decode(model, mel, options) return result.text def process_text(input_text): # Apply your function here to process the input text output_text = input_text.upper() return output_text demo = gr.Blocks() with demo: audio = gr.Audio(type="filepath") text1 = gr.Textbox() text2 = gr.Textbox() b1 = gr.Button("Transcribe audio") b2 = gr.Button("Process text") b1.click(transcribe, inputs=audio, outputs=text1) b2.click(process_text, inputs=text1, outputs=text2) demo.launch() # In this example, the process_text function just converts the input text to uppercase, but you can replace it with your desired function. The Gradio Blocks interface will have two buttons: "Transcribe audio" and "Process text". The first button transcribes the audio and fills the first textbox, and the second button processes the text from the first textbox and fills the second textbox. # gr.Interface( # title = 'OpenAI Whisper ASR Gradio Web UI', # fn=transcribe, # inputs=[ # gr.inputs.Audio(source="microphone", type="filepath") # ], # outputs=[ # "textbox" # ], # live=True).launch()