import gradio as gr
# from gradio import ChatMessage
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import numpy as np
import librosa
import json
import os


from huggingface_hub import InferenceClient

hf_token = os.getenv("HF_Token")

# def get_token():
#     with open("credentials.json","r") as f:
#         credentials = json.load(f)
#     return credentials['token']

# hf_token = get_token()

client = InferenceClient(
    "meta-llama/Meta-Llama-3-8B-Instruct",
    token=hf_token)


processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
model.config.forced_decoder_ids = None

def chat(audio, chat:list):
    transcription = transcribe(audio)
    chat.append({'role':'user','content':transcription})
    response = client.chat_completion(
        messages=chat,
        max_tokens=500,
        stream=False,
    ).choices[0].message.content
    chat.append({'role':'assistant','content':response})
    return chat

def transcribe(audio):
    sr, audio = audio
    audio = audio.astype(np.float32)
    if len(audio.shape) > 2 and audio.shape[1] > 1:
        audio = np.mean(audio, axis=1)
    audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
    input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features 
    predicted_ids = model.generate(input_features)
    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False)
    transcription = processor.tokenizer.normalize(transcription[0])
    return transcription

with gr.Blocks() as app:

    chatbot = gr.Chatbot(
        value=[{
            'role':'System',
            'content':'You are a helpfull assitant for an Audio based Chatbot. You are helping Users to order their notes and thoughts.'
        }],
        bubble_full_width=False,
        type="messages"
    )

    with gr.Row():

        audio_input = gr.Audio(
            sources=['microphone'],
            interactive=True,
            scale=8
        )

        # mode_option = gr.Radio(
        #     choices=["online", "local"],
        #     scale=1
        # )

    # Event listener for when the audio recording stops
    audio_input.stop_recording(fn=chat, inputs=[audio_input, chatbot], outputs=chatbot)

app.launch()