Pijush2023 commited on
Commit
f0bef0b
·
verified ·
1 Parent(s): d8595c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -186
app.py CHANGED
@@ -28,156 +28,63 @@ import numpy as np
28
  import threading
29
 
30
 
31
- # Setup Neo4j connection
32
  graph = Neo4jGraph(
33
  url="neo4j+s://6457770f.databases.neo4j.io",
34
  username="neo4j",
35
  password="Z10duoPkKCtENuOukw3eIlvl0xJWKtrVSr-_hGX1LQ4"
36
  )
37
 
38
- # Define entity extraction and retrieval functions
39
- class Entities(BaseModel):
40
- names: List[str] = Field(
41
- ..., description="All the person, organization, or business entities that appear in the text"
42
- )
43
-
44
- entity_prompt = ChatPromptTemplate.from_messages([
45
- ("system", "You are extracting organization and person entities from the text."),
46
- ("human", "Use the given format to extract information from the following input: {question}"),
47
- ])
48
-
49
- chat_model = ChatOpenAI(temperature=0, model_name="gpt-4o", api_key=os.environ['OPENAI_API_KEY'])
50
- entity_chain = entity_prompt | chat_model.with_structured_output(Entities)
51
-
52
- def remove_lucene_chars(input: str) -> str:
53
- return input.translate(str.maketrans({
54
- "\\": r"\\", "+": r"\+", "-": r"\-", "&": r"\&", "|": r"\|", "!": r"\!",
55
- "(": r"\(", ")": r"\)", "{": r"\{", "}": r"\}", "[": r"\[", "]": r"\]",
56
- "^": r"\^", "~": r"\~", "*": r"\*", "?": r"\?", ":": r"\:", '"': r'\"',
57
- ";": r"\;", " ": r"\ "
58
- }))
59
-
60
- def generate_full_text_query(input: str) -> str:
61
- full_text_query = ""
62
- words = [el for el in remove_lucene_chars(input).split() if el]
63
- for word in words[:-1]:
64
- full_text_query += f" {word}~2 AND"
65
- full_text_query += f" {words[-1]}~2"
66
- return full_text_query.strip()
67
-
68
- # Setup logging to a file to capture debug information
69
- logging.basicConfig(filename='neo4j_retrieval.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
70
-
71
- def structured_retriever(question: str) -> str:
72
- result = ""
73
- entities = entity_chain.invoke({"question": question})
74
- for entity in entities.names:
75
- response = graph.query(
76
- """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
77
- YIELD node,score
78
- CALL {
79
- WITH node
80
- MATCH (node)-[r:!MENTIONS]->(neighbor)
81
- RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
82
- UNION ALL
83
- WITH node
84
- MATCH (node)<-[r:!MENTIONS]-(neighbor)
85
- RETURN neighbor.id + ' - ' + type(r) + ' -> ' + node.id AS output
86
- }
87
- RETURN output LIMIT 50
88
- """,
89
- {"query": generate_full_text_query(entity)},
90
- )
91
- result += "\n".join([el['output'] for el in response])
92
- return result
93
-
94
- def retriever_neo4j(question: str):
95
- structured_data = structured_retriever(question)
96
- logging.debug(f"Structured data: {structured_data}")
97
- return structured_data
98
-
99
- # Setup for condensing the follow-up questions
100
- _template = """Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question,
101
- in its original language.
102
- Chat History:
103
- {chat_history}
104
- Follow Up Input: {question}
105
- Standalone question:"""
106
-
107
- CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
108
-
109
- def _format_chat_history(chat_history: list[tuple[str, str]]) -> list:
110
- buffer = []
111
- for human, ai in chat_history:
112
- buffer.append(HumanMessage(content=human))
113
- buffer.append(AIMessage(content=ai))
114
- return buffer
115
-
116
- _search_query = RunnableBranch(
117
- (
118
- RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
119
- run_name="HasChatHistoryCheck"
120
- ),
121
- RunnablePassthrough.assign(
122
- chat_history=lambda x: _format_chat_history(x["chat_history"])
123
- )
124
- | CONDENSE_QUESTION_PROMPT
125
- | ChatOpenAI(temperature=0, api_key=os.environ['OPENAI_API_KEY'])
126
- | StrOutputParser(),
127
- ),
128
- RunnableLambda(lambda x: x["question"]),
129
- )
130
-
131
-
132
  template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
133
  Ask your question directly, and I'll provide a precise and quick,short and crisp response in a conversational way without any Greet.
134
  {context}
135
- Question: {question}
136
- Answer:"""
137
 
 
 
138
 
 
 
139
  qa_prompt = ChatPromptTemplate.from_template(template)
140
 
141
- # Define the chain for Neo4j-based retrieval and response generation
142
- chain_neo4j = (
143
- RunnableParallel(
144
- {
145
- "context": RunnableLambda(lambda x: retriever_neo4j(x["question"])),
146
- "question": RunnablePassthrough(),
147
- }
 
 
 
 
 
 
 
148
  )
149
- | ChatPromptTemplate.from_template("Answer: {context} Question: {question}")
150
- | chat_model
151
- | StrOutputParser()
152
- )
153
 
154
- # Define the function to query Neo4j and get a response
155
  def get_response(question):
156
  try:
157
- return chain_neo4j.invoke({"question": question})
 
 
 
158
  except Exception as e:
159
  return f"Error: {str(e)}"
160
 
161
-
162
-
163
  # Function to generate audio with Eleven Labs TTS
164
  def generate_audio_elevenlabs(text):
165
  XI_API_KEY = os.environ['ELEVENLABS_API']
166
  VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
167
  tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
168
- headers = {
169
- "Accept": "application/json",
170
- "xi-api-key": XI_API_KEY
171
- }
172
  data = {
173
  "text": str(text),
174
  "model_id": "eleven_multilingual_v2",
175
- "voice_settings": {
176
- "stability": 1.0,
177
- "similarity_boost": 0.0,
178
- "style": 0.60,
179
- "use_speaker_boost": False
180
- }
181
  }
182
  response = requests.post(tts_url, headers=headers, json=data, stream=True)
183
  if response.ok:
@@ -190,7 +97,7 @@ def generate_audio_elevenlabs(text):
190
  else:
191
  return None
192
 
193
- # Define ASR model for speech-to-text
194
  model_id = 'openai/whisper-large-v3'
195
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
196
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
@@ -203,79 +110,34 @@ pipe_asr = pipeline(
203
  tokenizer=processor.tokenizer,
204
  feature_extractor=processor.feature_extractor,
205
  max_new_tokens=128,
206
- chunk_length_s=5, # Process audio in 5-second chunks
207
  batch_size=16,
208
  torch_dtype=torch_dtype,
209
  device=device,
210
- return_timestamps=False
211
  )
212
 
213
- # Function to handle audio transcription in real-time
214
- def transcribe_function(stream, new_chunk):
215
- try:
216
- sr, y = new_chunk[0], new_chunk[1]
217
- except TypeError:
218
- print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
219
- return stream, "", None
220
-
221
- # Ensure y is not empty and is at least 1-dimensional
222
- if y is None or len(y) == 0:
223
- return stream, "", None
224
-
225
  y = y.astype(np.float32)
226
  max_abs_y = np.max(np.abs(y))
227
  if max_abs_y > 0:
228
  y = y / max_abs_y
229
-
230
- # Ensure stream is also at least 1-dimensional before concatenation
231
- if stream is not None and len(stream) > 0:
232
- stream = np.concatenate([stream, y])
233
- else:
234
- stream = y
235
-
236
- # Process the audio data for transcription
237
- result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
238
- full_text = result.get("text", "")
239
-
240
- # Start a thread to reset the state after 10 seconds
241
- threading.Thread(target=auto_reset_state).start()
242
-
243
- return stream, full_text, full_text
244
-
245
- # Define the Gradio interface
246
- with gr.Blocks(theme="rawrsor1/Everforest") as demo:
247
- audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=True, label="Speak to Ask")
248
- transcription_textbox = gr.Textbox(label="Transcription", interactive=False)
249
- submit_voice_btn = gr.Button("Submit Voice")
250
- clear_state_btn = gr.Button("Clear State")
251
- audio_output = gr.Audio(label="Response Audio", type="filepath", autoplay=True, interactive=False)
252
-
253
- # Initialize the stream as an empty array for the first input
254
- stream = np.array([])
255
-
256
- # Update the transcription text in real-time as the user speaks
257
- audio_input.stream(
258
- fn=lambda new_chunk: transcribe_function(stream, new_chunk),
259
- inputs=audio_input,
260
- outputs=[None, transcription_textbox, transcription_textbox]
261
- )
262
 
263
- # Placeholder function for handling submission
264
- def handle_submit(text):
265
- # Placeholder function, could trigger response generation or other actions
266
- return f"You submitted: {text}"
267
-
268
- # Handle the submission of the final transcribed text
269
- submit_voice_btn.click(
270
- fn=handle_submit,
271
- inputs=transcription_textbox,
272
- outputs=transcription_textbox
273
- )
274
-
275
- # Interaction for Clear State Button
276
- clear_state_btn.click(
277
- fn=lambda: "",
278
- outputs=transcription_textbox
279
  )
280
 
281
  # Launch the Gradio interface
 
28
  import threading
29
 
30
 
31
+ # Setup Neo4j
32
  graph = Neo4jGraph(
33
  url="neo4j+s://6457770f.databases.neo4j.io",
34
  username="neo4j",
35
  password="Z10duoPkKCtENuOukw3eIlvl0xJWKtrVSr-_hGX1LQ4"
36
  )
37
 
38
+ # Define a prompt template for generating responses
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  template = """I am a guide for Birmingham, Alabama. I can provide recommendations and insights about the city, including events and activities.
40
  Ask your question directly, and I'll provide a precise and quick,short and crisp response in a conversational way without any Greet.
41
  {context}
 
 
42
 
43
+ Data:
44
+ {context}
45
 
46
+ User's question: {question}
47
+ Answer:"""
48
  qa_prompt = ChatPromptTemplate.from_template(template)
49
 
50
+ # Chat model configuration
51
+ chat_model = ChatOpenAI(temperature=0, model_name="gpt-4o", api_key=os.environ['OPENAI_API_KEY'])
52
+
53
+ # Function to generate a query for Neo4j and retrieve information
54
+ def generate_full_text_query(input: str) -> str:
55
+ return " ".join([f"{word}~2" for word in input.split()])
56
+
57
+ def retrieve_from_neo4j(question: str) -> str:
58
+ query = generate_full_text_query(question)
59
+ response = graph.query(
60
+ """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
61
+ YIELD node, score
62
+ RETURN node.name AS name, node.description AS description LIMIT 5""",
63
+ {"query": query}
64
  )
65
+ context = "\n".join([f"{el['name']}: {el['description']}" for el in response])
66
+ return context
 
 
67
 
68
+ # Function to generate the response using the prompt template and Neo4j data
69
  def get_response(question):
70
  try:
71
+ context = retrieve_from_neo4j(question)
72
+ prompt = qa_prompt.format_prompt(context=context, question=question)
73
+ response = chat_model(prompt.to_string())
74
+ return response
75
  except Exception as e:
76
  return f"Error: {str(e)}"
77
 
 
 
78
  # Function to generate audio with Eleven Labs TTS
79
  def generate_audio_elevenlabs(text):
80
  XI_API_KEY = os.environ['ELEVENLABS_API']
81
  VOICE_ID = 'ehbJzYLQFpwbJmGkqbnW'
82
  tts_url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
83
+ headers = {"Accept": "application/json", "xi-api-key": XI_API_KEY}
 
 
 
84
  data = {
85
  "text": str(text),
86
  "model_id": "eleven_multilingual_v2",
87
+ "voice_settings": {"stability": 1.0, "similarity_boost": 0.0}
 
 
 
 
 
88
  }
89
  response = requests.post(tts_url, headers=headers, json=data, stream=True)
90
  if response.ok:
 
97
  else:
98
  return None
99
 
100
+ # Define the ASR model with Whisper
101
  model_id = 'openai/whisper-large-v3'
102
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
103
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 
110
  tokenizer=processor.tokenizer,
111
  feature_extractor=processor.feature_extractor,
112
  max_new_tokens=128,
113
+ chunk_length_s=15,
114
  batch_size=16,
115
  torch_dtype=torch_dtype,
116
  device=device,
117
+ return_timestamps=True
118
  )
119
 
120
+ # Define the function to transcribe audio and generate a response
121
+ def transcribe_and_respond(audio):
122
+ sr, y = audio[0], audio[1]
 
 
 
 
 
 
 
 
 
123
  y = y.astype(np.float32)
124
  max_abs_y = np.max(np.abs(y))
125
  if max_abs_y > 0:
126
  y = y / max_abs_y
127
+ result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
128
+ text = result.get("text", "")
129
+ response = get_response(text)
130
+ audio_path = generate_audio_elevenlabs(response)
131
+ return audio_path
132
+
133
+ with gr.Blocks() as demo:
134
+ audio_input = gr.Audio(sources=["microphone"], streaming=False, type='numpy', label="Speak to Ask")
135
+ audio_output = gr.Audio(label="Audio", type="filepath", autoplay=True, interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
+ audio_input.change(
138
+ fn=transcribe_and_respond,
139
+ inputs=audio_input,
140
+ outputs=audio_output,
 
 
 
 
 
 
 
 
 
 
 
 
141
  )
142
 
143
  # Launch the Gradio interface