import gradio as gr import os import requests import tempfile import torch import numpy as np from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor from langchain_core.prompts import ChatPromptTemplate from langchain_openai import ChatOpenAI from langchain_community.graphs import Neo4jGraph # Setup Neo4j graph = Neo4jGraph( url="neo4j+s://6457770f.databases.neo4j.io", username="neo4j", password="Z10duoPkKCtENuOukw3eIlvl0xJWKtrVSr-_hGX1LQ4" ) # Define a concise prompt template for generating responses template = """I am a guide for Birmingham, Alabama. I will provide a precise and short response based solely on the provided data. Do not include any additional commentary or context. Data: {context} User's question: {question} Answer:""" qa_prompt = ChatPromptTemplate.from_template(template) # Chat model configuration chat_model = ChatOpenAI(temperature=0, model_name="gpt-4o", api_key=os.environ['OPENAI_API_KEY']) # Function to generate a query for Neo4j and retrieve information def generate_full_text_query(input: str) -> str: return " ".join([f"{word}~2" for word in input.split()]) def retrieve_from_neo4j(question: str) -> str: query = generate_full_text_query(question) response = graph.query( """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2}) YIELD node, score RETURN node.name AS name, node.description AS description LIMIT 5""", {"query": query} ) context = "\n".join([f"{el['name']}: {el['description']}" for el in response]) return context # Function to generate the response using the prompt template and Neo4j data def get_response(question): try: context = retrieve_from_neo4j(question) prompt = qa_prompt.format_prompt(context=context, question=question) response = chat_model(prompt.to_string()) # Filter extraneous content, keeping only the answer part if "Answer:" in response: response = response.split("Answer:")[-1].strip() # Extract the part after "Answer:" and strip extra spaces return response except Exception as e: return f"Error: {str(e)}" # Function to generate audio with Eleven Labs TTS def generate_audio_elevenlabs(text): XI_API_KEY = os.environ['ELEVENLABS_API'] VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW' tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream" headers = {"Accept": "application/json", "xi-api-key": XI_API_KEY} data = { "text": str(text), "model_id": "eleven_multilingual_v2", "voice_settings": {"stability": 1.0, "similarity_boost": 0.0} } response = requests.post(tts_url, headers=headers, json=data, stream=True) if response.ok: with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f: for chunk in response.iter_content(chunk_size=1024): if chunk: f.write(chunk) audio_path = f.name return audio_path else: return None # Define the ASR model with Whisper model_id = 'openai/whisper-large-v3' device = "cuda:0" if torch.cuda.is_available() else "cpu" torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device) processor = AutoProcessor.from_pretrained(model_id) pipe_asr = pipeline( "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, max_new_tokens=128, chunk_length_s=15, batch_size=16, torch_dtype=torch_dtype, device=device, return_timestamps=True ) # Define the function to transcribe audio and generate a response def transcribe_and_respond(audio): sr, y = audio[0], audio[1] y = y.astype(np.float32) max_abs_y = np.max(np.abs(y)) if max_abs_y > 0: y = y / max_abs_y result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False) text = result.get("text", "") response = get_response(text) audio_path = generate_audio_elevenlabs(response) return audio_path with gr.Blocks() as demo: audio_input = gr.Audio(sources=["microphone"], streaming=False, type='numpy', label="Speak to Ask") audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False) audio_input.change( fn=transcribe_and_respond, inputs=audio_input, outputs=audio_output, ) # Launch the Gradio interface demo.launch(show_error=True, share=True)