import gradio as gr
import os
import requests
import tempfile
import torch
import numpy as np
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_community.graphs import Neo4jGraph

# Setup Neo4j
graph = Neo4jGraph(
    url="neo4j+s://6457770f.databases.neo4j.io",
    username="neo4j",
    password="Z10duoPkKCtENuOukw3eIlvl0xJWKtrVSr-_hGX1LQ4"
)

# Define a concise prompt template for generating responses
template = """I am a guide for Birmingham, Alabama. I will provide a precise and short response based solely on the provided data.
Do not include any additional commentary or context.

Data:
{context}

User's question: {question}
Answer:"""
qa_prompt = ChatPromptTemplate.from_template(template)

# Chat model configuration
chat_model = ChatOpenAI(temperature=0, model_name="gpt-4o", api_key=os.environ['OPENAI_API_KEY'])

# Function to generate a query for Neo4j and retrieve information
def generate_full_text_query(input: str) -> str:
    return " ".join([f"{word}~2" for word in input.split()])

def retrieve_from_neo4j(question: str) -> str:
    query = generate_full_text_query(question)
    response = graph.query(
        """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
        YIELD node, score
        RETURN node.name AS name, node.description AS description LIMIT 5""",
        {"query": query}
    )
    context = "\n".join([f"{el['name']}: {el['description']}" for el in response])
    return context

# Function to generate the response using the prompt template and Neo4j data
def get_response(question):
    try:
        context = retrieve_from_neo4j(question)
        prompt = qa_prompt.format_prompt(context=context, question=question)
        response = chat_model(prompt.to_string())
        
        # Filter extraneous content, keeping only the answer part
        if "Answer:" in response:
            response = response.split("Answer:")[-1].strip()  # Extract the part after "Answer:" and strip extra spaces
        
        return response
    except Exception as e:
        return f"Error: {str(e)}"

# Function to generate audio with Eleven Labs TTS
def generate_audio_elevenlabs(text):
    XI_API_KEY = os.environ['ELEVENLABS_API']
    VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
    tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
    headers = {"Accept": "application/json", "xi-api-key": XI_API_KEY}
    data = {
        "text": str(text),
        "model_id": "eleven_multilingual_v2",
        "voice_settings": {"stability": 1.0, "similarity_boost": 0.0}
    }
    response = requests.post(tts_url, headers=headers, json=data, stream=True)
    if response.ok:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
            audio_path = f.name
        return audio_path
    else:
        return None

# Define the ASR model with Whisper
model_id = 'openai/whisper-large-v3'
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
processor = AutoProcessor.from_pretrained(model_id)

pipe_asr = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    max_new_tokens=128,
    chunk_length_s=15,
    batch_size=16,
    torch_dtype=torch_dtype,
    device=device,
    return_timestamps=True
)

# Define the function to transcribe audio and generate a response
def transcribe_and_respond(audio):
    sr, y = audio[0], audio[1]
    y = y.astype(np.float32)
    max_abs_y = np.max(np.abs(y))
    if max_abs_y > 0:
        y = y / max_abs_y
    result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
    text = result.get("text", "")
    response = get_response(text)
    audio_path = generate_audio_elevenlabs(response)
    return audio_path

with gr.Blocks() as demo:
    audio_input = gr.Audio(sources=["microphone"], streaming=False, type='numpy', label="Speak to Ask")
    audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False)
    
    audio_input.change(
        fn=transcribe_and_respond,
        inputs=audio_input,
        outputs=audio_output,
    )

# Launch the Gradio interface
demo.launch(show_error=True, share=True)