voice_to_text_system / voice_to_text_systemdev -checkpoint-checkpoint.py
samarthsrivastava's picture
Upload folder using huggingface_hub
a46e1df verified
raw
history blame
4.09 kB
#!/usr/bin/env python
# coding: utf-8
# In[29]:
import ipywidgets as widgets
from IPython.display import display, clear_output
from threading import Thread
from queue import Queue
import time
messages = Queue()
recordings = Queue()
record_button = widgets.Button(
description="Record",
disabled=False,
button_style="success",
icon="microphone"
)
stop_button = widgets.Button(
description="Stop",
disabled=False,
button_style="warning",
icon="stop"
)
output = widgets.Output()
def record_microphone():
while not messages.empty():
time.sleep(1) # Simulate recording
recordings.put("Audio recorded.") # Simulated recorded audio data
def speech_recognition(output_widget):
while not messages.empty():
time.sleep(2) # Simulate transcription
with output_widget:
clear_output(wait=True)
display("Transcription: Hello, how are you?") # Simulated transcription result
def start_recording(data):
if not messages.empty():
return # Recording already in progress
messages.put(True)
with output:
clear_output(wait=True)
display("Starting...")
record = Thread(target=record_microphone)
record.start()
transcribe = Thread(target=speech_recognition, args=(output,))
transcribe.start()
def stop_recording(data):
if messages.empty():
return # No recording in progress
messages.get()
with output:
clear_output(wait=True)
display("Stopped.")
record_button.on_click(start_recording)
stop_button.on_click(stop_recording)
display(widgets.HBox([record_button, stop_button]), output)
# In[30]:
get_ipython().system('python -m pip install pyaudio')
# In[31]:
import pyaudio
p = pyaudio.PyAudio()
for i in range(p.get_device_count()):
print(p.get_device_info_by_index(i))
p.terminate()
# In[32]:
import pyaudio
from queue import Queue
CHANNELS = 1
FRAME_RATE = 16000
RECORD_SECONDS = 20
AUDIO_FORMAT = pyaudio.paInt16
SAMPLE_SIZE = 2
messages = Queue()
recordings = Queue()
def record_microphone(chunk=1024):
p = pyaudio.PyAudio()
stream = p.open(format=AUDIO_FORMAT,
channels=CHANNELS,
rate=FRAME_RATE,
input=True,
input_device_index=1,
frames_per_buffer=chunk)
frames = []
while not messages.empty():
data = stream.read(chunk)
frames.append(data)
if len(frames) >= int(FRAME_RATE * RECORD_SECONDS / chunk):
recordings.put(frames.copy())
frames = []
stream.stop_stream()
stream.close()
p.terminate()
# In[33]:
import subprocess
import json
from vosk import Model, KaldiRecognizer
model = Model(model_name="vosk-model-en-us-0.42-gigaspeech")
rec = KaldiRecognizer(model, FRAME_RATE)
rec.SetWords(True)
def speech_recognition(output):
while not messages.empty():
frames = recordings.get()
rec.AcceptWaveform(b''.join(frames))
result = rec.Result()
text = json.loads(result)["text"]
cased = subprocess.check_output("python recasepunc/recasepunc.py predict recasepunc/checkpoint", shell=True, text=True, input=text)
output.append_stdout(cased)
# In[2]:
def my_function(input1, input2):
# Process the inputs and generate the output
output = f"Processed {input1} and {input2}"
return output
# In[5]:
import gradio as gr
# Define the function you want to expose through Gradio
def my_function(input1, input2):
output = f"Processed {input1} and {input2}"
return output
# Create the Gradio interface
iface = gr.Interface(
fn=my_function,
inputs=[gr.Textbox(label="Input 1"), gr.Textbox(label="Input 2")],
outputs=gr.Textbox(label="Output")
)
# Launch the interface with a public link
iface.launch(share=True)
# In[ ]: