Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -173,7 +173,6 @@ class MultimodalQueryEngine(CustomQueryEngine):
|
|
173 |
# return Response(response=str(llm_response), source_nodes=nodes, metadata={"text_nodes": text_nodes, "image_nodes": image_nodes})
|
174 |
|
175 |
|
176 |
-
|
177 |
class MultimodalQueryEngine(CustomQueryEngine):
|
178 |
qa_prompt: PromptTemplate
|
179 |
retriever: BaseRetriever
|
@@ -203,7 +202,6 @@ class MultimodalQueryEngine(CustomQueryEngine):
|
|
203 |
nodes, query_bundle=QueryBundle(query_str)
|
204 |
)
|
205 |
|
206 |
-
|
207 |
# create image nodes from the image associated with those nodes
|
208 |
image_nodes = [
|
209 |
NodeWithScore(node=ImageNode(image_path=n.node.metadata["image_path"]))
|
@@ -216,17 +214,20 @@ class MultimodalQueryEngine(CustomQueryEngine):
|
|
216 |
)
|
217 |
|
218 |
# prompt for the LLM
|
219 |
-
fmt_prompt = self.qa_prompt.format(
|
|
|
|
|
220 |
|
221 |
# use the multimodal LLM to interpret images and generate a response to the prompt
|
222 |
-
|
223 |
prompt=fmt_prompt,
|
224 |
image_documents=[image_node.node for image_node in image_nodes],
|
225 |
)
|
|
|
226 |
return Response(
|
227 |
-
response=str(
|
228 |
source_nodes=nodes,
|
229 |
-
metadata={"text_nodes":
|
230 |
)
|
231 |
|
232 |
query_engine = MultimodalQueryEngine(QA_PROMPT, retriever, gpt_4o_mm)
|
|
|
173 |
# return Response(response=str(llm_response), source_nodes=nodes, metadata={"text_nodes": text_nodes, "image_nodes": image_nodes})
|
174 |
|
175 |
|
|
|
176 |
class MultimodalQueryEngine(CustomQueryEngine):
|
177 |
qa_prompt: PromptTemplate
|
178 |
retriever: BaseRetriever
|
|
|
202 |
nodes, query_bundle=QueryBundle(query_str)
|
203 |
)
|
204 |
|
|
|
205 |
# create image nodes from the image associated with those nodes
|
206 |
image_nodes = [
|
207 |
NodeWithScore(node=ImageNode(image_path=n.node.metadata["image_path"]))
|
|
|
214 |
)
|
215 |
|
216 |
# prompt for the LLM
|
217 |
+
fmt_prompt = self.qa_prompt.format(
|
218 |
+
context_str=ctx_str, query_str=query_str, encoded_image_url=encoded_image_url
|
219 |
+
)
|
220 |
|
221 |
# use the multimodal LLM to interpret images and generate a response to the prompt
|
222 |
+
llm_response = self.multi_modal_llm.complete(
|
223 |
prompt=fmt_prompt,
|
224 |
image_documents=[image_node.node for image_node in image_nodes],
|
225 |
)
|
226 |
+
|
227 |
return Response(
|
228 |
+
response=str(llm_response),
|
229 |
source_nodes=nodes,
|
230 |
+
metadata={"text_nodes": nodes, "image_nodes": image_nodes},
|
231 |
)
|
232 |
|
233 |
query_engine = MultimodalQueryEngine(QA_PROMPT, retriever, gpt_4o_mm)
|