Spaces:
Running
Running
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool | |
import datetime | |
import requests | |
import pytz | |
import yaml | |
from tools.final_answer import FinalAnswerTool | |
from Gradio_UI import GradioUI | |
from bs4 import BeautifulSoup | |
import arxiv | |
from PyPDF2 import PdfReader | |
from xml.etree import ElementTree | |
# Below is an example of a tool that does nothing. Amaze us with your creativity ! | |
def get_top_paper()-> str: | |
"""A tool that fetches the most upvoted paper on Hugging Face daily papers. | |
""" | |
url = "https://huggingface.co/papers" | |
try: | |
res = requests.get(url) | |
res.raise_for_status() | |
# Parse the HTML response | |
soup = BeautifulSoup(response.text, "html.parser") | |
#inspect h3 the selector | |
top_paper = soup.find("h3") | |
if top_paper_element: | |
return top_paper.text.strip() | |
else: | |
return "Paper not found" | |
except Exception as e: | |
return f"Error fetching top paper: {str(e)}" | |
def get_paper_link(title:str)->str: | |
""" | |
A Tool that finds the Hugging Face paper link given its title. | |
Args: | |
title: A string representing the title of the paper (eg., 'Competitive Programming with Large Reasoning Models'). | |
""" | |
url = "https://huggingface.co/papers" | |
try: | |
res = requests.get(url) | |
res.raise_for_status() | |
soup = BeautifulSoup(response.text, "html.parser") | |
paper_links = soup.find("h3") | |
for paper in paper_links: | |
if paper.text.strip() == title: | |
return "https://huggingface.co" + paper["href"] | |
return "Paper link not found." | |
except Exception as e: | |
return f"Error fetching paper link: {str(e)}" | |
def get_paper_content(link:str)->str: | |
""" | |
A tool that reads the first four pages of a paper and returns its content as a string given its link. | |
Args: | |
link: A string representing the URL of the paper (eg., 'https://huggingface.co/papers/2502.06807'). | |
""" | |
try: | |
#Get the id from the Hugging face URL | |
paper_id = link.split("/papers/")[-1] | |
paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id]))) | |
#Get the PDF URL of the paper from arXiv | |
pdf_url = paper.entry_id.replace("abs", "pdf") + ".pdf" | |
response = requests.get(pdf_url) | |
response.raise_for_status() | |
pdf_path = "temp_paper.pdf" | |
with open(pdf_path, "wb") as file: | |
file.write(response.content) | |
content = "" | |
reader = PdfReader(pdf_path) | |
pages = reader.pages[:4] | |
for page in pages: | |
content += page.extract_text() | |
return content | |
except Exception as e: | |
return f"Error reading paper: {str(e)}" | |
def get_related_papers(title:str, max_results:int)->list: | |
""" | |
A tool that searches for related papers on arXiv based on the title of the query paper. | |
Args: | |
title: A string representing the title of the query paper to find related papers for. | |
max_results: A integer representing the number of related papers to return. | |
Returns: | |
list: A list of dictionaries, each containing a related paper's title and URL. | |
""" | |
try: | |
search_url = f"http://export.arxiv.org/api/query?search_query=title:{title}&start=0&max_results={max_results}" | |
resp = requests.get(search_url) | |
if resp.status_code != 200: | |
return f"Error: Failed to retrieve papers from arXiv. Status code: {response.status_code}" | |
root = ElementTree.fromstring(resp.text) | |
papers = [] | |
for entry in root.findall("{http://www.w3.org/2005/Atom}entry"): | |
paper_title = entry.find("{http://www.w3.org/2005/Atom}title").text | |
paper_url = entry.find("{http://www.w3.org/2005/Atom}id").text | |
papers.append({"title": paper_title, "url": paper_url}) | |
return papers | |
except Exception as e: | |
return f"Error: {str(e)}" | |
MODEL_IDS = [ | |
'https://wxknx1kg971u7k1n.us-east-1.aws.endpoints.huggingface.cloud/', | |
'https://jc26mwg228mkj8dw.us-east-1.aws.endpoints.huggingface.cloud/', | |
# Add here wherever model is working for you | |
] | |
def is_model_overloaded(model_url): | |
"""Verify if the model is overloaded doing a test call.""" | |
try: | |
response = requests.post(model_url, json={"inputs": "Test"}) | |
if response.status_code == 503: # 503 Service Unavailable = Overloaded | |
return True | |
return False | |
except requests.RequestException: | |
return True # if there are an error is overloaded | |
def get_available_model(): | |
"""Select the first model available from the list.""" | |
for model_url in MODEL_IDS: | |
if not is_model_overloaded(model_url): | |
return model_url | |
return MODEL_IDS[0] # if all are failing, use the first model by dfault | |
selected_model_id = get_available_model() | |
final_answer = FinalAnswerTool() | |
model = HfApiModel( | |
max_tokens=2096, | |
temperature=0.5, | |
model_id=selected_model_id, | |
custom_role_conversions=None, | |
) | |
with open("prompts.yaml", 'r') as stream: | |
prompt_templates = yaml.safe_load(stream) | |
agent = CodeAgent( | |
model=model, | |
tools=[final_answer,get_top_paper,get_paper_link,get_paper_content,get_related_papers], ## add your tools here (don't remove final answer) | |
max_steps=6, | |
verbosity_level=1, | |
grammar=None, | |
planning_interval=None, | |
name=None, | |
description=None, | |
prompt_templates=prompt_templates | |
) | |
GradioUI(agent).launch() |