Hiba03's picture
Update app.py
38b4b88 verified
raw
history blame
5.66 kB
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
from bs4 import BeautifulSoup
import arxiv
from PyPDF2 import PdfReader
from xml.etree import ElementTree
# Below is an example of a tool that does nothing. Amaze us with your creativity !
@tool
def get_top_paper()-> str:
"""A tool that fetches the most upvoted paper on Hugging Face daily papers.
"""
url = "https://huggingface.co/papers"
try:
res = requests.get(url)
res.raise_for_status()
# Parse the HTML response
soup = BeautifulSoup(response.text, "html.parser")
#inspect h3 the selector
top_paper = soup.find("h3")
if top_paper_element:
return top_paper.text.strip()
else:
return "Paper not found"
except Exception as e:
return f"Error fetching top paper: {str(e)}"
@tool
def get_paper_link(title:str)->str:
"""
A Tool that finds the Hugging Face paper link given its title.
Args:
title: A string representing the title of the paper (eg., 'Competitive Programming with Large Reasoning Models').
"""
url = "https://huggingface.co/papers"
try:
res = requests.get(url)
res.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
paper_links = soup.find("h3")
for paper in paper_links:
if paper.text.strip() == title:
return "https://huggingface.co" + paper["href"]
return "Paper link not found."
except Exception as e:
return f"Error fetching paper link: {str(e)}"
@tool
def get_paper_content(link:str)->str:
"""
A tool that reads the first four pages of a paper and returns its content as a string given its link.
Args:
link: A string representing the URL of the paper (eg., 'https://huggingface.co/papers/2502.06807').
"""
try:
#Get the id from the Hugging face URL
paper_id = link.split("/papers/")[-1]
paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
#Get the PDF URL of the paper from arXiv
pdf_url = paper.entry_id.replace("abs", "pdf") + ".pdf"
response = requests.get(pdf_url)
response.raise_for_status()
pdf_path = "temp_paper.pdf"
with open(pdf_path, "wb") as file:
file.write(response.content)
content = ""
reader = PdfReader(pdf_path)
pages = reader.pages[:4]
for page in pages:
content += page.extract_text()
return content
except Exception as e:
return f"Error reading paper: {str(e)}"
@tool
def get_related_papers(title:str, max_results:int)->list:
"""
A tool that searches for related papers on arXiv based on the title of the query paper.
Args:
title: A string representing the title of the query paper to find related papers for.
max_results: A integer representing the number of related papers to return.
Returns:
list: A list of dictionaries, each containing a related paper's title and URL.
"""
try:
search_url = f"http://export.arxiv.org/api/query?search_query=title:{title}&start=0&max_results={max_results}"
resp = requests.get(search_url)
if resp.status_code != 200:
return f"Error: Failed to retrieve papers from arXiv. Status code: {response.status_code}"
root = ElementTree.fromstring(resp.text)
papers = []
for entry in root.findall("{http://www.w3.org/2005/Atom}entry"):
paper_title = entry.find("{http://www.w3.org/2005/Atom}title").text
paper_url = entry.find("{http://www.w3.org/2005/Atom}id").text
papers.append({"title": paper_title, "url": paper_url})
return papers
except Exception as e:
return f"Error: {str(e)}"
MODEL_IDS = [
'https://wxknx1kg971u7k1n.us-east-1.aws.endpoints.huggingface.cloud/',
'https://jc26mwg228mkj8dw.us-east-1.aws.endpoints.huggingface.cloud/',
# Add here wherever model is working for you
]
def is_model_overloaded(model_url):
"""Verify if the model is overloaded doing a test call."""
try:
response = requests.post(model_url, json={"inputs": "Test"})
if response.status_code == 503: # 503 Service Unavailable = Overloaded
return True
return False
except requests.RequestException:
return True # if there are an error is overloaded
def get_available_model():
"""Select the first model available from the list."""
for model_url in MODEL_IDS:
if not is_model_overloaded(model_url):
return model_url
return MODEL_IDS[0] # if all are failing, use the first model by dfault
selected_model_id = get_available_model()
final_answer = FinalAnswerTool()
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id=selected_model_id,
custom_role_conversions=None,
)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[final_answer,get_top_paper,get_paper_link,get_paper_content,get_related_papers], ## add your tools here (don't remove final answer)
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch()