Spaces:
Runtime error
Runtime error
File size: 5,022 Bytes
7f3430b c71d159 8527f42 7e66356 8527f42 7e66356 8527f42 92b0167 8527f42 c71d159 92b0167 7e66356 c71d159 7e66356 c71d159 7e66356 c71d159 7e66356 c71d159 7e66356 c71d159 7e66356 8527f42 7e66356 8527f42 7702656 8527f42 7702656 165cb65 8527f42 7e66356 8527f42 7702656 7e66356 8527f42 7e66356 0f95a25 7e66356 165cb65 7e66356 2eda2b6 7e66356 8527f42 0f95a25 8527f42 7e66356 8527f42 7e66356 8527f42 7e66356 28374c4 934e44a 7e66356 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import gradio as gr
import os
import logging
import requests
import tempfile
from langchain_openai import ChatOpenAI
from langchain_community.graphs import Neo4jGraph
import torch
import numpy as np
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
import threading
# Setup Neo4j connection
graph = Neo4jGraph(
url="neo4j+s://6457770f.databases.neo4j.io",
username="neo4j",
password="Z10duoPkKCtENuOukw3eIlvl0xJWKtrVSr-_hGX1LQ4"
)
# Function to clean input for Neo4j full-text query
def remove_lucene_chars(input: str) -> str:
return input.translate(str.maketrans({
"\\": r"\\", "+": r"\+", "-": r"\-", "&": r"\&", "|": r"\|", "!": r"\!",
"(": r"\(", ")": r"\)", "{": r"\{", "}": r"\}", "[": r"\[", "]": r"\]",
"^": r"\^", "~": r"\~", "*": r"\*", "?": r"\?", ":": r"\:", '"': r'\"',
";": r"\;", " ": r"\ "
}))
# Function to generate a full-text query
def generate_full_text_query(input: str) -> str:
full_text_query = ""
words = [el for el in remove_lucene_chars(input).split() if el]
for word in words[:-1]:
full_text_query += f" {word}~2 AND"
full_text_query += f" {words[-1]}~2"
return full_text_query.strip()
# Define the function to query Neo4j and get a response
def get_response(question):
query = generate_full_text_query(question)
try:
# Query the Neo4j database using a full-text search
response = graph.query(
"""
CALL db.index.fulltext.queryNodes('entity', $query)
YIELD node, score
RETURN node.content AS content, score
ORDER BY score DESC LIMIT 1
""",
{"query": query}
)
# Extract the content from the top response
if response:
result = response[0]['content']
return result
else:
return "Sorry, I couldn't find any relevant information in the database."
except Exception as e:
logging.error(f"Error querying Neo4j: {e}")
return "An error occurred while fetching data from the database."
# Function to generate audio with Eleven Labs TTS
def generate_audio_elevenlabs(text):
XI_API_KEY = os.environ['ELEVENLABS_API']
VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
headers = {
"Accept": "application/json",
"xi-api-key": XI_API_KEY
}
data = {
"text": str(text),
"model_id": "eleven_multilingual_v2",
"voice_settings": {
"stability": 1.0,
"similarity_boost": 0.0,
"style": 0.60,
"use_speaker_boost": False
}
}
response = requests.post(tts_url, headers=headers, json=data, stream=True)
if response.ok:
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
audio_path = f.name
return audio_path
else:
return None
# Define ASR model for speech-to-text
model_id = 'openai/whisper-large-v3'
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
processor = AutoProcessor.from_pretrained(model_id)
pipe_asr = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
chunk_length_s=15,
batch_size=16,
torch_dtype=torch_dtype,
device=device,
return_timestamps=True
)
# Function to handle voice input, generate response from Neo4j, and return audio output
def handle_voice_to_voice(audio):
# Transcribe audio input to text
sr, y = audio
# Ensure that the audio is in float32 format
y = y.astype(np.float32)
y = y / np.max(np.abs(y)) # Normalize audio to range [-1.0, 1.0]
# Process the audio data with Whisper ASR
result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
question = result.get("text", "")
# Get response using the transcribed question
response = get_response(question)
# Generate audio from the response
audio_path = generate_audio_elevenlabs(response)
return audio_path
# Define the Gradio interface
with gr.Blocks() as demo:
audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=False, label="Speak to Ask")
submit_voice_btn = gr.Button("Submit Voice")
audio_output = gr.Audio(label="Response Audio", type="filepath", autoplay=True, interactive=False)
# Interactions for Submit Voice Button
submit_voice_btn.click(
fn=handle_voice_to_voice,
inputs=audio_input,
outputs=audio_output
)
# Launch the Gradio interface
demo.launch(show_error=True, share=True)
|