lokami's picture
Add search_kaggle_datasets, download_kaggle_dataset and authorize the use of datasci libs
156c068
raw
history blame
5.69 kB
from smolagents import CodeAgent,DuckDuckGoSearchTool,HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
# from typing import Optional
from kaggle.api.kaggle_api_extended import KaggleApi
import os
from Gradio_UI import GradioUI
# Below is an example of a tool that does nothing. Amaze us with your creativity !
@tool
def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
#Keep this format for the description / args / args description but feel free to modify the tool
"""A tool that does nothing yet
Args:
arg1: the first argument
arg2: the second argument
"""
return "What magic will you build ?"
@tool
def search_kaggle_datasets(search_term:str, kaggle_username:str = None, kaggle_key:str = None, max_results:int = 10)-> list[dict[str]]:
"""Search for datasets on Kaggle based on a search term.
Args:
search_term: The term to search for.
kaggle_username: Your Kaggle username.
kaggle_key: Your Kaggle API key.
max_results: Maximum number of results to return.
"""
# Initialize the Kaggle API
api = KaggleApi()
# Authenticate using provided credentials
if kaggle_username and kaggle_key:
# Create a temporary kaggle.json file
kaggle_json_content = f'{{"username":"{kaggle_username}","key":"{kaggle_key}"}}'
kaggle_json_path = os.path.expanduser("~/.kaggle/kaggle.json")
os.makedirs(os.path.dirname(kaggle_json_path), exist_ok=True)
with open(kaggle_json_path, "w") as f:
f.write(kaggle_json_content)
os.chmod(kaggle_json_path, 0o600) # Set permissions to read/write for the owner only
else:
# Use the default kaggle.json file if no credentials are provided
return 'Error in searching Kaggle datasets: No username or key provided.'
try:
api.authenticate()
except Exception as e:
return f"Error authenticating with Kaggle: {str(e)}"
# Search for datasets
datasets = api.dataset_list(search=search_term)
# Limit the number of results
datasets = datasets[:max_results]
# Extract relevant information
results = []
for dataset in datasets:
dataset_info = api.dataset_view(dataset)
results.append({
'title': dataset_info['title'],
'url': f"https://www.kaggle.com/{dataset_info['ref']}",
'size': dataset_info['size'],
'files': dataset_info['files'],
'last_updated': dataset_info['lastUpdated']
})
# Clean up the temporary kaggle.json file if it was created
if kaggle_username and kaggle_key:
os.remove(kaggle_json_path)
return results
@tool
def download_kaggle_dataset(
dataset_ref: str,
download_path: str,
kaggle_username: str = None,
kaggle_key: str = None,
unzip: bool = True
) -> str:
"""Download a dataset from Kaggle.
Args:
dataset_ref: The reference of the dataset (e.g., "username/dataset-name").
download_path: The directory where the dataset will be downloaded.
kaggle_username: Your Kaggle username (from kaggle.json).
kaggle_key: Your Kaggle API key (from kaggle.json).
unzip: Whether to unzip the dataset after downloading. Default is True.
"""
# Initialize the Kaggle API
api = KaggleApi()
# Authenticate using provided credentials
if kaggle_username and kaggle_key:
# Create a temporary kaggle.json file
kaggle_json_content = f'{{"username":"{kaggle_username}","key":"{kaggle_key}"}}'
kaggle_json_path = os.path.expanduser("~/.kaggle/kaggle.json")
os.makedirs(os.path.dirname(kaggle_json_path), exist_ok=True)
with open(kaggle_json_path, "w") as f:
f.write(kaggle_json_content)
os.chmod(kaggle_json_path, 0o600) # Set permissions to read/write for the owner only
else:
# Use the default kaggle.json file if no credentials are provided
pass
try:
api.authenticate()
except Exception as e:
return f"Error authenticating with Kaggle: {str(e)}"
# Ensure the download path exists
os.makedirs(download_path, exist_ok=True)
# Download the dataset
api.dataset_download_files(dataset_ref, path=download_path, unzip=unzip)
# Clean up the temporary kaggle.json file if it was created
if kaggle_username and kaggle_key:
os.remove(kaggle_json_path)
return f"Dataset '{dataset_ref}' downloaded to '{download_path}'."
final_answer = FinalAnswerTool()
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)
# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[final_answer, search_kaggle_datasets, download_kaggle_dataset], ## add your tools here (don't remove final answer)
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates,
additional_authorized_imports=['pandas', 'matplotlib', 'seaborn'],
)
GradioUI(agent).launch()