Spaces:

acmc
/

grammASRian

Sleeping

grammASRian / app.py

aldan.creo

First version

b99bb69 24 days ago

2.51 kB

	import gradio as gr
	from transformers import pipeline
	import numpy as np

	transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")


	def transcribe(state, words_list, new_chunk):
	print(f"state: {state}")

	if state is None:
	state = {}

	stream = state.get("stream", None)
	previous_transcription = state.get("full_transcription", "")
	previous_counts_of_words = state.get("counts_of_words", {})

	if new_chunk is None:
	gr.Info("You can start transcribing by clicking on the Record button")
	print("new chunk is None")
	return state, previous_counts_of_words, previous_transcription

	sr, y = new_chunk

	try:
	words_to_check_for = [word.strip() for word in words_list.split(",")]
	except:
	gr.Warning("Please enter a valid list of words to check for")
	words_to_check_for = []

	# Convert to mono if stereo
	if y.ndim > 1:
	y = y.mean(axis=1)

	y = y.astype(np.float32)
	y /= np.max(np.abs(y))

	if stream is not None:
	stream = np.concatenate([stream, y])
	else:
	stream = y

	try:
	new_transcription = transcriber({"sampling_rate": sr, "raw": stream})
	except Exception as e:
	gr.Error(f"Transcription failed. Error: {e}")
	print(f"Transcription failed. Error: {e}")
	return state, previous_counts_of_words, previous_transcription

	print(f"new transcription: {new_transcription}")
	new_transcription_text = new_transcription["text"]
	full_transcription_text = f"{previous_transcription} {new_transcription_text}"

	new_transcription_text_lower = new_transcription_text.lower()

	new_counts_of_words = {
	word: new_transcription_text_lower.count(word) for word in words_to_check_for
	}

	new_counts_of_words = {
	word: new_counts_of_words.get(word, 0) + previous_counts_of_words.get(word, 0)
	for word in words_to_check_for
	}

	new_state = {
	"stream": stream,
	"full_transcription": full_transcription_text,
	"counts_of_words": new_counts_of_words,
	}

	print(f"new state: {new_state}")

	return new_state, new_counts_of_words, full_transcription_text


	demo = gr.Interface(
	transcribe,
	[
	"state",
	gr.Textbox(label="List of filer words"),
	gr.Audio(sources=["microphone"], streaming=True),
	],
	["state", gr.JSON(label="Filler words count"), gr.Text(label="Transcription")],
	live=True,
	)

	demo.launch()