Towhidul commited on
Commit
afc80d1
·
verified ·
1 Parent(s): ed4a009

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -173,7 +173,6 @@ class MultimodalQueryEngine(CustomQueryEngine):
173
  # return Response(response=str(llm_response), source_nodes=nodes, metadata={"text_nodes": text_nodes, "image_nodes": image_nodes})
174
 
175
 
176
-
177
  class MultimodalQueryEngine(CustomQueryEngine):
178
  qa_prompt: PromptTemplate
179
  retriever: BaseRetriever
@@ -203,7 +202,6 @@ class MultimodalQueryEngine(CustomQueryEngine):
203
  nodes, query_bundle=QueryBundle(query_str)
204
  )
205
 
206
-
207
  # create image nodes from the image associated with those nodes
208
  image_nodes = [
209
  NodeWithScore(node=ImageNode(image_path=n.node.metadata["image_path"]))
@@ -216,17 +214,20 @@ class MultimodalQueryEngine(CustomQueryEngine):
216
  )
217
 
218
  # prompt for the LLM
219
- fmt_prompt = self.qa_prompt.format(context_str=ctx_str, query_str=query_str,encoded_image_url=encoded_image_url)
 
 
220
 
221
  # use the multimodal LLM to interpret images and generate a response to the prompt
222
- llm_repsonse = self.multi_modal_llm.complete(
223
  prompt=fmt_prompt,
224
  image_documents=[image_node.node for image_node in image_nodes],
225
  )
 
226
  return Response(
227
- response=str(llm_repsonse),
228
  source_nodes=nodes,
229
- metadata={"text_nodes": text_nodes, "image_nodes": image_nodes},
230
  )
231
 
232
  query_engine = MultimodalQueryEngine(QA_PROMPT, retriever, gpt_4o_mm)
 
173
  # return Response(response=str(llm_response), source_nodes=nodes, metadata={"text_nodes": text_nodes, "image_nodes": image_nodes})
174
 
175
 
 
176
  class MultimodalQueryEngine(CustomQueryEngine):
177
  qa_prompt: PromptTemplate
178
  retriever: BaseRetriever
 
202
  nodes, query_bundle=QueryBundle(query_str)
203
  )
204
 
 
205
  # create image nodes from the image associated with those nodes
206
  image_nodes = [
207
  NodeWithScore(node=ImageNode(image_path=n.node.metadata["image_path"]))
 
214
  )
215
 
216
  # prompt for the LLM
217
+ fmt_prompt = self.qa_prompt.format(
218
+ context_str=ctx_str, query_str=query_str, encoded_image_url=encoded_image_url
219
+ )
220
 
221
  # use the multimodal LLM to interpret images and generate a response to the prompt
222
+ llm_response = self.multi_modal_llm.complete(
223
  prompt=fmt_prompt,
224
  image_documents=[image_node.node for image_node in image_nodes],
225
  )
226
+
227
  return Response(
228
+ response=str(llm_response),
229
  source_nodes=nodes,
230
+ metadata={"text_nodes": nodes, "image_nodes": image_nodes},
231
  )
232
 
233
  query_engine = MultimodalQueryEngine(QA_PROMPT, retriever, gpt_4o_mm)