#!/usr/bin/env python # coding: utf-8 # In[29]: import ipywidgets as widgets from IPython.display import display, clear_output from threading import Thread from queue import Queue import time messages = Queue() recordings = Queue() record_button = widgets.Button( description="Record", disabled=False, button_style="success", icon="microphone" ) stop_button = widgets.Button( description="Stop", disabled=False, button_style="warning", icon="stop" ) output = widgets.Output() def record_microphone(): while not messages.empty(): time.sleep(1) # Simulate recording recordings.put("Audio recorded.") # Simulated recorded audio data def speech_recognition(output_widget): while not messages.empty(): time.sleep(2) # Simulate transcription with output_widget: clear_output(wait=True) display("Transcription: Hello, how are you?") # Simulated transcription result def start_recording(data): if not messages.empty(): return # Recording already in progress messages.put(True) with output: clear_output(wait=True) display("Starting...") record = Thread(target=record_microphone) record.start() transcribe = Thread(target=speech_recognition, args=(output,)) transcribe.start() def stop_recording(data): if messages.empty(): return # No recording in progress messages.get() with output: clear_output(wait=True) display("Stopped.") record_button.on_click(start_recording) stop_button.on_click(stop_recording) display(widgets.HBox([record_button, stop_button]), output) # In[30]: get_ipython().system('python -m pip install pyaudio') # In[31]: import pyaudio p = pyaudio.PyAudio() for i in range(p.get_device_count()): print(p.get_device_info_by_index(i)) p.terminate() # In[32]: import pyaudio from queue import Queue CHANNELS = 1 FRAME_RATE = 16000 RECORD_SECONDS = 20 AUDIO_FORMAT = pyaudio.paInt16 SAMPLE_SIZE = 2 messages = Queue() recordings = Queue() def record_microphone(chunk=1024): p = pyaudio.PyAudio() stream = p.open(format=AUDIO_FORMAT, channels=CHANNELS, rate=FRAME_RATE, input=True, input_device_index=1, frames_per_buffer=chunk) frames = [] while not messages.empty(): data = stream.read(chunk) frames.append(data) if len(frames) >= int(FRAME_RATE * RECORD_SECONDS / chunk): recordings.put(frames.copy()) frames = [] stream.stop_stream() stream.close() p.terminate() # In[33]: import subprocess import json from vosk import Model, KaldiRecognizer model = Model(model_name="vosk-model-en-us-0.42-gigaspeech") rec = KaldiRecognizer(model, FRAME_RATE) rec.SetWords(True) def speech_recognition(output): while not messages.empty(): frames = recordings.get() rec.AcceptWaveform(b''.join(frames)) result = rec.Result() text = json.loads(result)["text"] cased = subprocess.check_output("python recasepunc/recasepunc.py predict recasepunc/checkpoint", shell=True, text=True, input=text) output.append_stdout(cased) # In[2]: def my_function(input1, input2): # Process the inputs and generate the output output = f"Processed {input1} and {input2}" return output # In[5]: import gradio as gr # Define the function you want to expose through Gradio def my_function(input1, input2): output = f"Processed {input1} and {input2}" return output # Create the Gradio interface iface = gr.Interface( fn=my_function, inputs=[gr.Textbox(label="Input 1"), gr.Textbox(label="Input 2")], outputs=gr.Textbox(label="Output") ) # Launch the interface with a public link iface.launch(share=True) # In[ ]: