Spaces:
Sleeping
Sleeping
import gradio as gr | |
# from gradio import ChatMessage | |
from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
import numpy as np | |
import librosa | |
import json | |
import os | |
from huggingface_hub import InferenceClient | |
hf_token = os.getenv("HF_Token") | |
# def get_token(): | |
# with open("credentials.json","r") as f: | |
# credentials = json.load(f) | |
# return credentials['token'] | |
# hf_token = get_token() | |
client = InferenceClient( | |
"meta-llama/Meta-Llama-3-8B-Instruct", | |
token=hf_token) | |
processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2") | |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2") | |
model.config.forced_decoder_ids = None | |
def chat(audio, chat:list): | |
transcription = transcribe(audio) | |
chat.append({'role':'user','content':transcription}) | |
response = client.chat_completion( | |
messages=chat, | |
max_tokens=500, | |
stream=False, | |
).choices[0].message.content | |
chat.append({'role':'assistant','content':response}) | |
return chat | |
def transcribe(audio): | |
sr, audio = audio | |
audio = audio.astype(np.float32) | |
if len(audio.shape) > 2 and audio.shape[1] > 1: | |
audio = np.mean(audio, axis=1) | |
audio = librosa.resample(audio, orig_sr=sr, target_sr=16000) | |
input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features | |
predicted_ids = model.generate(input_features) | |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False) | |
transcription = processor.tokenizer.normalize(transcription[0]) | |
return transcription | |
with gr.Blocks() as app: | |
chatbot = gr.Chatbot( | |
value=[{ | |
'role':'System', | |
'content':'You are a helpfull assitant for an Audio based Chatbot. You are helping Users to order their notes and thoughts.' | |
}], | |
bubble_full_width=False, | |
type="messages" | |
) | |
with gr.Row(): | |
audio_input = gr.Audio( | |
sources=['microphone'], | |
interactive=True, | |
scale=8 | |
) | |
# mode_option = gr.Radio( | |
# choices=["online", "local"], | |
# scale=1 | |
# ) | |
# Event listener for when the audio recording stops | |
audio_input.stop_recording(fn=chat, inputs=[audio_input, chatbot], outputs=chatbot) | |
app.launch() |