Spaces:

justus-tobias
/

VoiceBot

Sleeping

VoiceBot / app.py

j-tobias

initial commit

e1e27eb 5 months ago

2.35 kB

	import gradio as gr
	# from gradio import ChatMessage
	from transformers import WhisperProcessor, WhisperForConditionalGeneration
	import numpy as np
	import librosa
	import json
	import os


	from huggingface_hub import InferenceClient

	hf_token = os.getenv("HF_Token")

	# def get_token():
	# with open("credentials.json","r") as f:
	# credentials = json.load(f)
	# return credentials['token']

	# hf_token = get_token()

	client = InferenceClient(
	"meta-llama/Meta-Llama-3-8B-Instruct",
	token=hf_token)




	processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
	model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
	model.config.forced_decoder_ids = None

	def chat(audio, chat:list):
	transcription = transcribe(audio)
	chat.append({'role':'user','content':transcription})
	response = client.chat_completion(
	messages=chat,
	max_tokens=500,
	stream=False,
	).choices[0].message.content
	chat.append({'role':'assistant','content':response})
	return chat

	def transcribe(audio):
	sr, audio = audio
	audio = audio.astype(np.float32)
	if len(audio.shape) > 2 and audio.shape[1] > 1:
	audio = np.mean(audio, axis=1)
	audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
	input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features
	predicted_ids = model.generate(input_features)
	transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False)
	transcription = processor.tokenizer.normalize(transcription[0])
	return transcription

	with gr.Blocks() as app:

	chatbot = gr.Chatbot(
	value=[{
	'role':'System',
	'content':'You are a helpfull assitant for an Audio based Chatbot. You are helping Users to order their notes and thoughts.'
	}],
	bubble_full_width=False,
	type="messages"
	)

	with gr.Row():

	audio_input = gr.Audio(
	sources=['microphone'],
	interactive=True,
	scale=8
	)

	# mode_option = gr.Radio(
	# choices=["online", "local"],
	# scale=1
	# )

	# Event listener for when the audio recording stops
	audio_input.stop_recording(fn=chat, inputs=[audio_input, chatbot], outputs=chatbot)

	app.launch()