isayahc commited on
Commit
49dc05d
·
1 Parent(s): c47b75b

added a tool for doenloading arxiv papers

Browse files
Files changed (1) hide show
  1. mixtral_agent.py +47 -3
mixtral_agent.py CHANGED
@@ -19,7 +19,7 @@ from typing import List, Dict
19
  from datetime import datetime
20
  from langchain.tools.render import render_text_description
21
  import os
22
-
23
 
24
  import dotenv
25
 
@@ -39,6 +39,12 @@ prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
39
 
40
  arxiv_retriever = ArxivRetriever(load_max_docs=2)
41
 
 
 
 
 
 
 
42
 
43
 
44
  def format_info_list(info_list: List[Dict[str, str]]) -> str:
@@ -98,10 +104,46 @@ def google_search(query: str) -> str:
98
  return cleaner_sources.__str__()
99
  # return organic_source
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
 
103
 
104
- tools = [arxiv_search,google_search]
 
 
 
 
105
 
106
  # tools = [
107
  # create_retriever_tool(
@@ -157,7 +199,9 @@ if __name__ == "__main__":
157
  input = agent_executor.invoke(
158
  {
159
  "input": "How to generate videos from images using state of the art macchine learning models; Using the axriv retriever " +
160
- "add the urls of the papers used in the final answer using the metadata from the retriever please do not use '`' "
 
 
161
  # f"Please prioritize the newest papers this is the current data {get_current_date()}"
162
  }
163
  )
 
19
  from datetime import datetime
20
  from langchain.tools.render import render_text_description
21
  import os
22
+ import arxiv
23
 
24
  import dotenv
25
 
 
39
 
40
  arxiv_retriever = ArxivRetriever(load_max_docs=2)
41
 
42
+ from zipfile import ZipFile
43
+
44
+ def unzip_file(zip_file: str, extract_to: str) -> None:
45
+ with ZipFile(zip_file, 'r') as zip_ref:
46
+ zip_ref.extractall(extract_to)
47
+
48
 
49
 
50
  def format_info_list(info_list: List[Dict[str, str]]) -> str:
 
104
  return cleaner_sources.__str__()
105
  # return organic_source
106
 
107
+ @tool
108
+ def get_arxiv_paper(paper_id:str) -> None:
109
+ """Download a paper from axriv to download a paper please input
110
+ the axriv id such as "1605.08386v1" This tool is named get_arxiv_paper
111
+ If you input "http://arxiv.org/abs/2312.02813", This will break the code. Also only do
112
+ "2312.02813". In addition please download one paper at a time. Pleaase keep the inputs/output
113
+ free of additional information only have the id.
114
+ """
115
+ t = 0
116
+ paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
117
+
118
+ number_without_period = paper_id.replace('.', '')
119
+
120
+
121
+ # Download the archive to the PWD with a default filename.
122
+ # paper.download_source()
123
+ # Download the archive to the PWD with a custom filename.
124
+ # paper.download_source(filename="downloaded-paper.tar.gz")
125
+ # Download the archive to a specified directory with a custom filename.
126
+ # paper.download_pdf(filename="downloaded-paper.pdf")
127
+ # Download the PDF to a specified directory with a custom filename.
128
+ paper.download_pdf(dirpath="./mydir", filename=f"{number_without_period}.pdf")
129
+
130
+ # file_name = number_without_period + ".tar.gz"
131
+ # dir_path = "./mydir"
132
+ # paper.download_source(dirpath=dir_path, filename=file_name)
133
+
134
+ # complete_path = dir_path + "/" + file_name
135
+
136
+ # unzip_file(complete_path,number_without_period)
137
+
138
+
139
 
140
 
141
 
142
+ tools = [
143
+ arxiv_search,
144
+ google_search,
145
+ get_arxiv_paper,
146
+ ]
147
 
148
  # tools = [
149
  # create_retriever_tool(
 
199
  input = agent_executor.invoke(
200
  {
201
  "input": "How to generate videos from images using state of the art macchine learning models; Using the axriv retriever " +
202
+ "add the urls of the papers used in the final answer using the metadata from the retriever please do not use '`' " +
203
+ "please use the `download_arxiv_paper` tool to download any axriv paper you find" +
204
+ "Please only use the tools provided to you"
205
  # f"Please prioritize the newest papers this is the current data {get_current_date()}"
206
  }
207
  )