File size: 5,022 Bytes
7f3430b
 
c71d159
8527f42
 
7e66356
 
8527f42
 
7e66356
8527f42
92b0167
8527f42
c71d159
 
 
 
92b0167
 
7e66356
c71d159
 
 
 
 
 
 
 
7e66356
c71d159
 
 
 
 
 
 
 
7e66356
 
 
 
 
c71d159
7e66356
 
 
 
 
c71d159
7e66356
c71d159
7e66356
 
 
 
 
 
8527f42
7e66356
 
8527f42
7702656
 
8527f42
7702656
 
165cb65
 
 
 
 
 
 
 
 
 
 
 
 
 
8527f42
 
 
 
 
 
 
7e66356
8527f42
7702656
 
7e66356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8527f42
7e66356
 
 
 
0f95a25
 
 
 
 
 
7e66356
 
165cb65
7e66356
 
2eda2b6
7e66356
 
 
8527f42
0f95a25
8527f42
7e66356
 
8527f42
7e66356
8527f42
 
 
 
7e66356
 
28374c4
934e44a
7e66356
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import gradio as gr
import os
import logging
import requests
import tempfile
from langchain_openai import ChatOpenAI
from langchain_community.graphs import Neo4jGraph
import torch
import numpy as np
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
import threading

# Setup Neo4j connection
graph = Neo4jGraph(
    url="neo4j+s://6457770f.databases.neo4j.io",
    username="neo4j",
    password="Z10duoPkKCtENuOukw3eIlvl0xJWKtrVSr-_hGX1LQ4"
)

# Function to clean input for Neo4j full-text query
def remove_lucene_chars(input: str) -> str:
    return input.translate(str.maketrans({
        "\\": r"\\", "+": r"\+", "-": r"\-", "&": r"\&", "|": r"\|", "!": r"\!",
        "(": r"\(", ")": r"\)", "{": r"\{", "}": r"\}", "[": r"\[", "]": r"\]",
        "^": r"\^", "~": r"\~", "*": r"\*", "?": r"\?", ":": r"\:", '"': r'\"',
        ";": r"\;", " ": r"\ "
    }))

# Function to generate a full-text query
def generate_full_text_query(input: str) -> str:
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

# Define the function to query Neo4j and get a response
def get_response(question):
    query = generate_full_text_query(question)
    try:
        # Query the Neo4j database using a full-text search
        response = graph.query(
            """
            CALL db.index.fulltext.queryNodes('entity', $query)
            YIELD node, score
            RETURN node.content AS content, score
            ORDER BY score DESC LIMIT 1
            """,
            {"query": query}
        )
        # Extract the content from the top response
        if response:
            result = response[0]['content']
            return result
        else:
            return "Sorry, I couldn't find any relevant information in the database."
    except Exception as e:
        logging.error(f"Error querying Neo4j: {e}")
        return "An error occurred while fetching data from the database."

# Function to generate audio with Eleven Labs TTS
def generate_audio_elevenlabs(text):
    XI_API_KEY = os.environ['ELEVENLABS_API']
    VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
    tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
    headers = {
        "Accept": "application/json",
        "xi-api-key": XI_API_KEY
    }
    data = {
        "text": str(text),
        "model_id": "eleven_multilingual_v2",
        "voice_settings": {
            "stability": 1.0,
            "similarity_boost": 0.0,
            "style": 0.60,
            "use_speaker_boost": False
        }
    }
    response = requests.post(tts_url, headers=headers, json=data, stream=True)
    if response.ok:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
            audio_path = f.name
        return audio_path
    else:
        return None

# Define ASR model for speech-to-text
model_id = 'openai/whisper-large-v3'
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
processor = AutoProcessor.from_pretrained(model_id)

pipe_asr = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    max_new_tokens=128,
    chunk_length_s=15,
    batch_size=16,
    torch_dtype=torch_dtype,
    device=device,
    return_timestamps=True
)

# Function to handle voice input, generate response from Neo4j, and return audio output
def handle_voice_to_voice(audio):
    # Transcribe audio input to text
    sr, y = audio
    
    # Ensure that the audio is in float32 format
    y = y.astype(np.float32)
    y = y / np.max(np.abs(y))  # Normalize audio to range [-1.0, 1.0]

    # Process the audio data with Whisper ASR
    result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
    question = result.get("text", "")

    # Get response using the transcribed question
    response = get_response(question)

    # Generate audio from the response
    audio_path = generate_audio_elevenlabs(response)
    return audio_path


# Define the Gradio interface
with gr.Blocks() as demo:
    audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=False, label="Speak to Ask")
    submit_voice_btn = gr.Button("Submit Voice")
    audio_output = gr.Audio(label="Response Audio", type="filepath", autoplay=True, interactive=False)

    # Interactions for Submit Voice Button
    submit_voice_btn.click(
        fn=handle_voice_to_voice,
        inputs=audio_input,
        outputs=audio_output
    )

# Launch the Gradio interface
demo.launch(show_error=True, share=True)