Spaces:

samarthsrivastava
/

voice_to_text_system

Build error

App Files Files Community

voice_to_text_system / voice_to_text_systemdev -checkpoint-checkpoint.py

samarthsrivastava

Upload folder using huggingface_hub

a46e1df verified 4 months ago

raw

history blame

No virus

4.09 kB

	#!/usr/bin/env python
	# coding: utf-8

	# In[29]:


	import ipywidgets as widgets
	from IPython.display import display, clear_output
	from threading import Thread
	from queue import Queue
	import time

	messages = Queue()
	recordings = Queue()

	record_button = widgets.Button(
	description="Record",
	disabled=False,
	button_style="success",
	icon="microphone"
	)

	stop_button = widgets.Button(
	description="Stop",
	disabled=False,
	button_style="warning",
	icon="stop"
	)

	output = widgets.Output()

	def record_microphone():
	while not messages.empty():
	time.sleep(1) # Simulate recording
	recordings.put("Audio recorded.") # Simulated recorded audio data

	def speech_recognition(output_widget):
	while not messages.empty():
	time.sleep(2) # Simulate transcription
	with output_widget:
	clear_output(wait=True)
	display("Transcription: Hello, how are you?") # Simulated transcription result

	def start_recording(data):
	if not messages.empty():
	return # Recording already in progress

	messages.put(True)
	with output:
	clear_output(wait=True)
	display("Starting...")

	record = Thread(target=record_microphone)
	record.start()

	transcribe = Thread(target=speech_recognition, args=(output,))
	transcribe.start()

	def stop_recording(data):
	if messages.empty():
	return # No recording in progress

	messages.get()
	with output:
	clear_output(wait=True)
	display("Stopped.")

	record_button.on_click(start_recording)
	stop_button.on_click(stop_recording)

	display(widgets.HBox([record_button, stop_button]), output)


	# In[30]:


	get_ipython().system('python -m pip install pyaudio')


	# In[31]:


	import pyaudio

	p = pyaudio.PyAudio()
	for i in range(p.get_device_count()):
	print(p.get_device_info_by_index(i))

	p.terminate()


	# In[32]:


	import pyaudio
	from queue import Queue

	CHANNELS = 1
	FRAME_RATE = 16000
	RECORD_SECONDS = 20
	AUDIO_FORMAT = pyaudio.paInt16
	SAMPLE_SIZE = 2

	messages = Queue()
	recordings = Queue()

	def record_microphone(chunk=1024):
	p = pyaudio.PyAudio()

	stream = p.open(format=AUDIO_FORMAT,
	channels=CHANNELS,
	rate=FRAME_RATE,
	input=True,
	input_device_index=1,
	frames_per_buffer=chunk)

	frames = []

	while not messages.empty():
	data = stream.read(chunk)
	frames.append(data)

	if len(frames) >= int(FRAME_RATE * RECORD_SECONDS / chunk):
	recordings.put(frames.copy())
	frames = []

	stream.stop_stream()
	stream.close()
	p.terminate()


	# In[33]:


	import subprocess
	import json
	from vosk import Model, KaldiRecognizer

	model = Model(model_name="vosk-model-en-us-0.42-gigaspeech")
	rec = KaldiRecognizer(model, FRAME_RATE)
	rec.SetWords(True)

	def speech_recognition(output):
	while not messages.empty():
	frames = recordings.get()

	rec.AcceptWaveform(b''.join(frames))
	result = rec.Result()
	text = json.loads(result)["text"]

	cased = subprocess.check_output("python recasepunc/recasepunc.py predict recasepunc/checkpoint", shell=True, text=True, input=text)
	output.append_stdout(cased)


	# In[2]:


	def my_function(input1, input2):
	# Process the inputs and generate the output
	output = f"Processed {input1} and {input2}"
	return output


	# In[5]:


	import gradio as gr

	# Define the function you want to expose through Gradio
	def my_function(input1, input2):
	output = f"Processed {input1} and {input2}"
	return output

	# Create the Gradio interface
	iface = gr.Interface(
	fn=my_function,
	inputs=[gr.Textbox(label="Input 1"), gr.Textbox(label="Input 2")],
	outputs=gr.Textbox(label="Output")
	)

	# Launch the interface with a public link
	iface.launch(share=True)


	# In[ ]: