Spaces:
Sleeping
Sleeping
from smolagents import CodeAgent,DuckDuckGoSearchTool,HfApiModel,load_tool,tool | |
import datetime | |
import requests | |
import pytz | |
import yaml | |
from tools.final_answer import FinalAnswerTool | |
# from typing import Optional | |
from kaggle.api.kaggle_api_extended import KaggleApi | |
import os | |
from Gradio_UI import GradioUI | |
# Below is an example of a tool that does nothing. Amaze us with your creativity ! | |
def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type | |
#Keep this format for the description / args / args description but feel free to modify the tool | |
"""A tool that does nothing yet | |
Args: | |
arg1: the first argument | |
arg2: the second argument | |
""" | |
return "What magic will you build ?" | |
def search_kaggle_datasets(search_term:str, kaggle_username:str = None, kaggle_key:str = None, max_results:int = 10)-> list[dict[str]]: | |
"""Search for datasets on Kaggle based on a search term. | |
Args: | |
search_term: The term to search for. | |
kaggle_username: Your Kaggle username. | |
kaggle_key: Your Kaggle API key. | |
max_results: Maximum number of results to return. | |
""" | |
# Initialize the Kaggle API | |
api = KaggleApi() | |
# Authenticate using provided credentials | |
if kaggle_username and kaggle_key: | |
# Create a temporary kaggle.json file | |
kaggle_json_content = f'{{"username":"{kaggle_username}","key":"{kaggle_key}"}}' | |
kaggle_json_path = os.path.expanduser("~/.kaggle/kaggle.json") | |
os.makedirs(os.path.dirname(kaggle_json_path), exist_ok=True) | |
with open(kaggle_json_path, "w") as f: | |
f.write(kaggle_json_content) | |
os.chmod(kaggle_json_path, 0o600) # Set permissions to read/write for the owner only | |
else: | |
# Use the default kaggle.json file if no credentials are provided | |
return 'Error in searching Kaggle datasets: No username or key provided.' | |
try: | |
api.authenticate() | |
except Exception as e: | |
return f"Error authenticating with Kaggle: {str(e)}" | |
# Search for datasets | |
datasets = api.dataset_list(search=search_term) | |
# Limit the number of results | |
datasets = datasets[:max_results] | |
# Extract relevant information | |
results = [] | |
for dataset in datasets: | |
dataset_info = api.dataset_view(dataset) | |
results.append({ | |
'title': dataset_info['title'], | |
'url': f"https://www.kaggle.com/{dataset_info['ref']}", | |
'size': dataset_info['size'], | |
'files': dataset_info['files'], | |
'last_updated': dataset_info['lastUpdated'] | |
}) | |
# Clean up the temporary kaggle.json file if it was created | |
if kaggle_username and kaggle_key: | |
os.remove(kaggle_json_path) | |
return results | |
def download_kaggle_dataset( | |
dataset_ref: str, | |
download_path: str, | |
kaggle_username: str = None, | |
kaggle_key: str = None, | |
unzip: bool = True | |
) -> str: | |
"""Download a dataset from Kaggle. | |
Args: | |
dataset_ref: The reference of the dataset (e.g., "username/dataset-name"). | |
download_path: The directory where the dataset will be downloaded. | |
kaggle_username: Your Kaggle username (from kaggle.json). | |
kaggle_key: Your Kaggle API key (from kaggle.json). | |
unzip: Whether to unzip the dataset after downloading. Default is True. | |
""" | |
# Initialize the Kaggle API | |
api = KaggleApi() | |
# Authenticate using provided credentials | |
if kaggle_username and kaggle_key: | |
# Create a temporary kaggle.json file | |
kaggle_json_content = f'{{"username":"{kaggle_username}","key":"{kaggle_key}"}}' | |
kaggle_json_path = os.path.expanduser("~/.kaggle/kaggle.json") | |
os.makedirs(os.path.dirname(kaggle_json_path), exist_ok=True) | |
with open(kaggle_json_path, "w") as f: | |
f.write(kaggle_json_content) | |
os.chmod(kaggle_json_path, 0o600) # Set permissions to read/write for the owner only | |
else: | |
# Use the default kaggle.json file if no credentials are provided | |
pass | |
try: | |
api.authenticate() | |
except Exception as e: | |
return f"Error authenticating with Kaggle: {str(e)}" | |
# Ensure the download path exists | |
os.makedirs(download_path, exist_ok=True) | |
# Download the dataset | |
api.dataset_download_files(dataset_ref, path=download_path, unzip=unzip) | |
# Clean up the temporary kaggle.json file if it was created | |
if kaggle_username and kaggle_key: | |
os.remove(kaggle_json_path) | |
return f"Dataset '{dataset_ref}' downloaded to '{download_path}'." | |
final_answer = FinalAnswerTool() | |
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder: | |
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' | |
model = HfApiModel( | |
max_tokens=2096, | |
temperature=0.5, | |
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded | |
custom_role_conversions=None, | |
) | |
# Import tool from Hub | |
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True) | |
with open("prompts.yaml", 'r') as stream: | |
prompt_templates = yaml.safe_load(stream) | |
agent = CodeAgent( | |
model=model, | |
tools=[final_answer, search_kaggle_datasets, download_kaggle_dataset], ## add your tools here (don't remove final answer) | |
max_steps=6, | |
verbosity_level=1, | |
grammar=None, | |
planning_interval=None, | |
name=None, | |
description=None, | |
prompt_templates=prompt_templates, | |
additional_authorized_imports=['pandas', 'matplotlib', 'seaborn'], | |
) | |
GradioUI(agent).launch() |