Spaces:
Sleeping
Sleeping
Update agentic.py
Browse files- agentic.py +88 -94
agentic.py
CHANGED
@@ -16,6 +16,9 @@ from langchain_google_genai import ChatGoogleGenerativeAI
|
|
16 |
from langfuse.callback import CallbackHandler
|
17 |
|
18 |
import base64
|
|
|
|
|
|
|
19 |
|
20 |
# import boto3
|
21 |
|
@@ -47,7 +50,7 @@ def get_assistant_model():
|
|
47 |
|
48 |
if llm_provider == "mistral":
|
49 |
assistant_model = ChatMistralAI(
|
50 |
-
model="mistral-small-latest",#"ministral-8b-latest",#
|
51 |
temperature=0,
|
52 |
max_retries=2,
|
53 |
api_key=os.getenv("MISTRAL_API_KEY")
|
@@ -104,12 +107,6 @@ def get_video_handler_model():
|
|
104 |
|
105 |
return video_handler_model
|
106 |
|
107 |
-
# reviewer_model = ChatMistralAI(
|
108 |
-
# model="mistral-small-latest",
|
109 |
-
# temperature=0,
|
110 |
-
# max_retries=2,
|
111 |
-
# api_key=os.getenv("MISTRAL_API_KEY"),
|
112 |
-
# )
|
113 |
|
114 |
def download_youtube_content(url: str, output_path: Optional[str] = None) -> None:
|
115 |
"""
|
@@ -262,7 +259,7 @@ def vision_model_call(state: State) -> str:
|
|
262 |
print(error_msg)
|
263 |
return ""
|
264 |
|
265 |
-
def
|
266 |
"""
|
267 |
Video handler model that can analyze videos and answer questions about them.
|
268 |
|
@@ -322,28 +319,6 @@ def video_handler_mode_call(state: State, video_url: str) -> str:
|
|
322 |
)
|
323 |
|
324 |
return response.content + "\n\n"
|
325 |
-
|
326 |
-
# def router(state: State)-> Literal["OK", "retry", "failed"]:
|
327 |
-
# """Determine the next step based on FINAL ANSWER"""
|
328 |
-
|
329 |
-
# print(f"MESSAGES : {state['messages'][-1].content}")
|
330 |
-
# print(f"STATE : {state['system_prompt']}")
|
331 |
-
# if True:
|
332 |
-
# return "OK"
|
333 |
-
# else:
|
334 |
-
# if state.status == "ERROR":
|
335 |
-
# if state.error_count >= 3:
|
336 |
-
# return "failed"
|
337 |
-
# return "retry"
|
338 |
-
|
339 |
-
# def reviewer_validation(state: State) -> Literal["OK", "NOK"]:
|
340 |
-
# print(f"MESSAGES : {state['messages'][-1].content}")
|
341 |
-
# print(f"STATE : {state}")
|
342 |
-
# return "OK"
|
343 |
-
# if True:
|
344 |
-
# return "OK"
|
345 |
-
# else:
|
346 |
-
# return "NOK"
|
347 |
|
348 |
# Tools
|
349 |
tools = [
|
@@ -351,11 +326,10 @@ tools = [
|
|
351 |
# search_webpage,
|
352 |
wikipedia_search,
|
353 |
vision_model_call,
|
354 |
-
|
355 |
]
|
356 |
|
357 |
assistant_with_tools = assistant_model.bind_tools(tools, parallel_tool_calls=False)
|
358 |
-
# reviewer_with_tools = reviewer_model.bind_tools(tools, parallel_tool_calls=False)
|
359 |
|
360 |
|
361 |
def assistant(state: State)-> str:
|
@@ -382,22 +356,18 @@ def assistant(state: State)-> str:
|
|
382 |
Returns:
|
383 |
{vision_model_call.__annotations__['return']}
|
384 |
|
385 |
-
|
386 |
-
{
|
387 |
Args:
|
388 |
-
{
|
389 |
Returns:
|
390 |
-
{
|
391 |
"""
|
392 |
|
393 |
with open("./prompt.txt", "r") as prompt_file:
|
394 |
system_prompt = prompt_file.read()
|
395 |
|
396 |
-
|
397 |
-
|
398 |
-
video_handler_prompt = "If the question is about a video and if you need to analyze a video, you must use the video_handler_mode_call tool."
|
399 |
-
|
400 |
-
sys_msg = SystemMessage(content=system_prompt+file_prompt+video_handler_prompt+textual_description_of_tool)
|
401 |
|
402 |
response = [assistant_with_tools.invoke([sys_msg] + state["messages"])]
|
403 |
|
@@ -408,29 +378,6 @@ def assistant(state: State)-> str:
|
|
408 |
"answer": state.get("answer", "")
|
409 |
}
|
410 |
|
411 |
-
# def reviewer(state: State)-> str:
|
412 |
-
|
413 |
-
# with open("./prompt.txt", "r") as prompt_file:
|
414 |
-
# system_prompt = prompt_file.read()
|
415 |
-
|
416 |
-
# sys_msg = SystemMessage(content=f"""
|
417 |
-
# You are a powerful AI assistant reviewer.
|
418 |
-
# You must review the answer of the previous assistant.
|
419 |
-
# If you need you can correct the answer.
|
420 |
-
# You need to make sure the answer is formatted correctly.
|
421 |
-
# The response should not be sent if FINAL ANSWER: is not registered and does not respect the constraints of the initial prompt.
|
422 |
-
|
423 |
-
# Here is the prompt of the previous assistant :
|
424 |
-
# {system_prompt}
|
425 |
-
|
426 |
-
# Here is the question: {state['question']}
|
427 |
-
# """)
|
428 |
-
|
429 |
-
# response = [reviewer_with_tools.invoke([sys_msg] + state["messages"])]
|
430 |
-
|
431 |
-
# return {
|
432 |
-
# "messages": response,
|
433 |
-
# }
|
434 |
|
435 |
def build_graph():
|
436 |
builder = StateGraph(State)
|
@@ -448,25 +395,6 @@ def build_graph():
|
|
448 |
tools_condition,
|
449 |
)
|
450 |
builder.add_edge("tools", "assistant")
|
451 |
-
# builder.add_edge("assistant", "reviewer")
|
452 |
-
|
453 |
-
# builder.add_conditional_edges(
|
454 |
-
# "assistant",
|
455 |
-
# router,
|
456 |
-
# {
|
457 |
-
# "OK": "reviewer",
|
458 |
-
# "retry": "assistant",
|
459 |
-
# "failed": END
|
460 |
-
# }
|
461 |
-
# )
|
462 |
-
# builder.add_conditional_edges(
|
463 |
-
# "reviewer",
|
464 |
-
# reviewer_validation,
|
465 |
-
# {
|
466 |
-
# "OK": END,
|
467 |
-
# "NOK": "assistant"
|
468 |
-
# }
|
469 |
-
# )
|
470 |
|
471 |
return builder.compile()
|
472 |
|
@@ -476,15 +404,11 @@ if __name__ == "__main__":
|
|
476 |
|
477 |
file_name = ""
|
478 |
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
# question = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
|
485 |
-
# question = "Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation."
|
486 |
-
# question = "Can you describe the image ?"
|
487 |
-
# file_name = "images/cca530fc-4052-43b2-b130-b30968d8aa44.png"
|
488 |
|
489 |
messages = [HumanMessage(content=f"Can you answer this question please ? {question}")]
|
490 |
|
@@ -505,6 +429,76 @@ if __name__ == "__main__":
|
|
505 |
except:
|
506 |
regex_result = re.search(r"\s*(?P<answer>.*)$", messages['messages'][-1].content)
|
507 |
answer = regex_result.group("answer")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
508 |
|
509 |
-
|
510 |
-
# from pprint import pprint
|
|
|
16 |
from langfuse.callback import CallbackHandler
|
17 |
|
18 |
import base64
|
19 |
+
import json
|
20 |
+
import time
|
21 |
+
from pprint import pprint
|
22 |
|
23 |
# import boto3
|
24 |
|
|
|
50 |
|
51 |
if llm_provider == "mistral":
|
52 |
assistant_model = ChatMistralAI(
|
53 |
+
model="mistral-large-2411",#"mistral-small-latest",#"ministral-8b-latest",#
|
54 |
temperature=0,
|
55 |
max_retries=2,
|
56 |
api_key=os.getenv("MISTRAL_API_KEY")
|
|
|
107 |
|
108 |
return video_handler_model
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
def download_youtube_content(url: str, output_path: Optional[str] = None) -> None:
|
112 |
"""
|
|
|
259 |
print(error_msg)
|
260 |
return ""
|
261 |
|
262 |
+
def video_handler_model_call(state: State, video_url: str) -> str:
|
263 |
"""
|
264 |
Video handler model that can analyze videos and answer questions about them.
|
265 |
|
|
|
319 |
)
|
320 |
|
321 |
return response.content + "\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
|
323 |
# Tools
|
324 |
tools = [
|
|
|
326 |
# search_webpage,
|
327 |
wikipedia_search,
|
328 |
vision_model_call,
|
329 |
+
video_handler_model_call
|
330 |
]
|
331 |
|
332 |
assistant_with_tools = assistant_model.bind_tools(tools, parallel_tool_calls=False)
|
|
|
333 |
|
334 |
|
335 |
def assistant(state: State)-> str:
|
|
|
356 |
Returns:
|
357 |
{vision_model_call.__annotations__['return']}
|
358 |
|
359 |
+
video_handler_model_call:
|
360 |
+
{video_handler_model_call.__doc__}
|
361 |
Args:
|
362 |
+
{video_handler_model_call.__annotations__}
|
363 |
Returns:
|
364 |
+
{video_handler_model_call.__annotations__['return']}
|
365 |
"""
|
366 |
|
367 |
with open("./prompt.txt", "r") as prompt_file:
|
368 |
system_prompt = prompt_file.read()
|
369 |
|
370 |
+
sys_msg = SystemMessage(content=system_prompt+textual_description_of_tool)
|
|
|
|
|
|
|
|
|
371 |
|
372 |
response = [assistant_with_tools.invoke([sys_msg] + state["messages"])]
|
373 |
|
|
|
378 |
"answer": state.get("answer", "")
|
379 |
}
|
380 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
|
382 |
def build_graph():
|
383 |
builder = StateGraph(State)
|
|
|
395 |
tools_condition,
|
396 |
)
|
397 |
builder.add_edge("tools", "assistant")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
|
399 |
return builder.compile()
|
400 |
|
|
|
404 |
|
405 |
file_name = ""
|
406 |
|
407 |
+
question = "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?"
|
408 |
+
# file_name = question_json.get("file_name", "")
|
409 |
+
|
410 |
+
print(f"QUESTION : {question}")
|
411 |
+
print(f"FILE: {file_name}")
|
|
|
|
|
|
|
|
|
412 |
|
413 |
messages = [HumanMessage(content=f"Can you answer this question please ? {question}")]
|
414 |
|
|
|
429 |
except:
|
430 |
regex_result = re.search(r"\s*(?P<answer>.*)$", messages['messages'][-1].content)
|
431 |
answer = regex_result.group("answer")
|
432 |
+
|
433 |
+
print(answer)
|
434 |
+
|
435 |
+
# with open("./questions.json", "r") as file:
|
436 |
+
# questions_json = json.loads(file.read())
|
437 |
+
|
438 |
+
# for question_json in questions_json:
|
439 |
+
# question = question_json.get("question", "")
|
440 |
+
# file_name = question_json.get("file_name", "")
|
441 |
+
|
442 |
+
# print(f"QUESTION : {question}")
|
443 |
+
# print(f"FILE: {file_name}")
|
444 |
+
|
445 |
+
# messages = [HumanMessage(content=f"Can you answer this question please ? {question}")]
|
446 |
+
|
447 |
+
# messages = agent_graph.invoke(
|
448 |
+
# input={"messages": messages, "question": question, "input_file": file_name},
|
449 |
+
# config={
|
450 |
+
# "recursion_limit": 10,
|
451 |
+
# "callbacks": [langfuse_handler]
|
452 |
+
# }
|
453 |
+
# )
|
454 |
+
|
455 |
+
# for m in messages['messages']:
|
456 |
+
# m.pretty_print()
|
457 |
+
|
458 |
+
# try:
|
459 |
+
# regex_result = re.search(r"FINAL ANSWER:\s*(?P<answer>.*)$", messages['messages'][-1].content)
|
460 |
+
# answer = regex_result.group("answer")
|
461 |
+
# except:
|
462 |
+
# regex_result = re.search(r"\s*(?P<answer>.*)$", messages['messages'][-1].content)
|
463 |
+
# answer = regex_result.group("answer")
|
464 |
+
|
465 |
+
# print(answer)
|
466 |
+
# time.sleep(10)
|
467 |
+
|
468 |
+
# def test():
|
469 |
+
# # System message
|
470 |
+
# textual_description_of_tool=f"""
|
471 |
+
# web_search:
|
472 |
+
# {web_search.description}
|
473 |
+
# Args:
|
474 |
+
# {web_search.args_schema.__doc__}
|
475 |
+
# {web_search.args_schema.__annotations__}
|
476 |
+
# Returns:
|
477 |
+
# response_format: {web_search.response_format}
|
478 |
+
|
479 |
+
# wikipedia_search:
|
480 |
+
# {wikipedia_search.description}
|
481 |
+
# Args:
|
482 |
+
# {wikipedia_search.args_schema.__doc__}
|
483 |
+
# {wikipedia_search.args_schema.__annotations__}
|
484 |
+
# Returns:
|
485 |
+
# response_format: {wikipedia_search.response_format}
|
486 |
+
|
487 |
+
# vision_model_call:
|
488 |
+
# {vision_model_call.__doc__}
|
489 |
+
# Args:
|
490 |
+
# {vision_model_call.__annotations__}
|
491 |
+
# Returns:
|
492 |
+
# {vision_model_call.__annotations__['return']}
|
493 |
+
|
494 |
+
# video_handler_model_call:
|
495 |
+
# {video_handler_model_call.__doc__}
|
496 |
+
# Args:
|
497 |
+
# {video_handler_model_call.__annotations__}
|
498 |
+
# Returns:
|
499 |
+
# {video_handler_model_call.__annotations__['return']}
|
500 |
+
# """
|
501 |
+
|
502 |
+
# pprint(textual_description_of_tool)
|
503 |
|
504 |
+
# pprint(web_search.__annotations__)
|
|