Spaces:
Sleeping
Sleeping
added a tool for doenloading arxiv papers
Browse files- mixtral_agent.py +47 -3
mixtral_agent.py
CHANGED
@@ -19,7 +19,7 @@ from typing import List, Dict
|
|
19 |
from datetime import datetime
|
20 |
from langchain.tools.render import render_text_description
|
21 |
import os
|
22 |
-
|
23 |
|
24 |
import dotenv
|
25 |
|
@@ -39,6 +39,12 @@ prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
|
|
39 |
|
40 |
arxiv_retriever = ArxivRetriever(load_max_docs=2)
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
|
44 |
def format_info_list(info_list: List[Dict[str, str]]) -> str:
|
@@ -98,10 +104,46 @@ def google_search(query: str) -> str:
|
|
98 |
return cleaner_sources.__str__()
|
99 |
# return organic_source
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
|
103 |
|
104 |
-
tools = [
|
|
|
|
|
|
|
|
|
105 |
|
106 |
# tools = [
|
107 |
# create_retriever_tool(
|
@@ -157,7 +199,9 @@ if __name__ == "__main__":
|
|
157 |
input = agent_executor.invoke(
|
158 |
{
|
159 |
"input": "How to generate videos from images using state of the art macchine learning models; Using the axriv retriever " +
|
160 |
-
"add the urls of the papers used in the final answer using the metadata from the retriever please do not use '`' "
|
|
|
|
|
161 |
# f"Please prioritize the newest papers this is the current data {get_current_date()}"
|
162 |
}
|
163 |
)
|
|
|
19 |
from datetime import datetime
|
20 |
from langchain.tools.render import render_text_description
|
21 |
import os
|
22 |
+
import arxiv
|
23 |
|
24 |
import dotenv
|
25 |
|
|
|
39 |
|
40 |
arxiv_retriever = ArxivRetriever(load_max_docs=2)
|
41 |
|
42 |
+
from zipfile import ZipFile
|
43 |
+
|
44 |
+
def unzip_file(zip_file: str, extract_to: str) -> None:
|
45 |
+
with ZipFile(zip_file, 'r') as zip_ref:
|
46 |
+
zip_ref.extractall(extract_to)
|
47 |
+
|
48 |
|
49 |
|
50 |
def format_info_list(info_list: List[Dict[str, str]]) -> str:
|
|
|
104 |
return cleaner_sources.__str__()
|
105 |
# return organic_source
|
106 |
|
107 |
+
@tool
|
108 |
+
def get_arxiv_paper(paper_id:str) -> None:
|
109 |
+
"""Download a paper from axriv to download a paper please input
|
110 |
+
the axriv id such as "1605.08386v1" This tool is named get_arxiv_paper
|
111 |
+
If you input "http://arxiv.org/abs/2312.02813", This will break the code. Also only do
|
112 |
+
"2312.02813". In addition please download one paper at a time. Pleaase keep the inputs/output
|
113 |
+
free of additional information only have the id.
|
114 |
+
"""
|
115 |
+
t = 0
|
116 |
+
paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
|
117 |
+
|
118 |
+
number_without_period = paper_id.replace('.', '')
|
119 |
+
|
120 |
+
|
121 |
+
# Download the archive to the PWD with a default filename.
|
122 |
+
# paper.download_source()
|
123 |
+
# Download the archive to the PWD with a custom filename.
|
124 |
+
# paper.download_source(filename="downloaded-paper.tar.gz")
|
125 |
+
# Download the archive to a specified directory with a custom filename.
|
126 |
+
# paper.download_pdf(filename="downloaded-paper.pdf")
|
127 |
+
# Download the PDF to a specified directory with a custom filename.
|
128 |
+
paper.download_pdf(dirpath="./mydir", filename=f"{number_without_period}.pdf")
|
129 |
+
|
130 |
+
# file_name = number_without_period + ".tar.gz"
|
131 |
+
# dir_path = "./mydir"
|
132 |
+
# paper.download_source(dirpath=dir_path, filename=file_name)
|
133 |
+
|
134 |
+
# complete_path = dir_path + "/" + file_name
|
135 |
+
|
136 |
+
# unzip_file(complete_path,number_without_period)
|
137 |
+
|
138 |
+
|
139 |
|
140 |
|
141 |
|
142 |
+
tools = [
|
143 |
+
arxiv_search,
|
144 |
+
google_search,
|
145 |
+
get_arxiv_paper,
|
146 |
+
]
|
147 |
|
148 |
# tools = [
|
149 |
# create_retriever_tool(
|
|
|
199 |
input = agent_executor.invoke(
|
200 |
{
|
201 |
"input": "How to generate videos from images using state of the art macchine learning models; Using the axriv retriever " +
|
202 |
+
"add the urls of the papers used in the final answer using the metadata from the retriever please do not use '`' " +
|
203 |
+
"please use the `download_arxiv_paper` tool to download any axriv paper you find" +
|
204 |
+
"Please only use the tools provided to you"
|
205 |
# f"Please prioritize the newest papers this is the current data {get_current_date()}"
|
206 |
}
|
207 |
)
|