Spaces:
Sleeping
Sleeping
from smolagents import CodeAgent,ToolCallingAgent,HfApiModel,load_tool,tool | |
import yaml | |
from tools.final_answer import FinalAnswerTool | |
from tools.user_input import UserInputTool | |
from kaggle.api.kaggle_api_extended import KaggleApi | |
import os | |
from Gradio_UI import GradioUI | |
os.environ['KAGGLE_USERNAME'] = '' | |
os.environ['KAGGLE_KEY'] = '' | |
def auth_kaggle() -> KaggleApi: | |
"""Authenticate Kaggle and return the API object. | |
""" | |
api = KaggleApi() | |
try: | |
api.authenticate() | |
except Exception as e: | |
return f"Error authenticating with Kaggle: {str(e)}" | |
return api | |
def search_kaggle_datasets(search_term:str, | |
max_results:int = 10 | |
) -> list[dict[str]]: | |
"""Search for datasets on Kaggle based on a search term and return list of datasets metadata. | |
Args: | |
search_term: The term to search for. | |
max_results: Maximum number of results to return. | |
""" | |
kaggle_api = auth_kaggle() | |
# Search for datasets | |
datasets = kaggle_api.dataset_list(search=search_term) | |
# Limit the number of results | |
datasets = datasets[:max_results] | |
# Extract relevant information | |
results = [] | |
for dataset in datasets: | |
dataset_info = kaggle_api.dataset_view(dataset) | |
results.append({ | |
'title': dataset_info['title'], | |
'url': f"https://www.kaggle.com/{dataset_info['ref']}", | |
'size': dataset_info['size'], | |
'files': dataset_info['files'], | |
'last_updated': dataset_info['lastUpdated'] | |
}) | |
return results | |
def download_kaggle_dataset( | |
dataset_ref: str, | |
download_path: str, | |
unzip: bool = True | |
) -> str: | |
"""Download a dataset from Kaggle. | |
Args: | |
dataset_ref: The reference of the dataset (e.g., "username/dataset-name"). | |
download_path: The directory where the dataset will be downloaded. | |
unzip: Whether to unzip the dataset after downloading. Default is True. | |
""" | |
# Ensure the download path exists | |
os.makedirs(download_path, exist_ok=True) | |
kaggle_api = auth_kaggle() | |
# Download the dataset | |
kaggle_api.dataset_download_files(dataset_ref, path=download_path, unzip=unzip) | |
return f"Dataset '{dataset_ref}' downloaded to '{download_path}'." | |
final_answer = FinalAnswerTool() | |
user_input = UserInputTool() | |
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder: | |
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' | |
model = HfApiModel( | |
max_tokens=2096, | |
temperature=0.5, | |
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded | |
custom_role_conversions=None, | |
) | |
# Import tool from Hub | |
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True) | |
with open("prompts.yaml", 'r') as stream: | |
prompt_templates = yaml.safe_load(stream) | |
conversional_agent = ToolCallingAgent( | |
model=model, | |
tools=[user_input], | |
max_steps=6, | |
name='ask_question', | |
description='Ask a question to the user and get the answer', | |
) | |
agent = CodeAgent( | |
model=model, | |
tools=[final_answer, | |
search_kaggle_datasets, | |
user_input, | |
download_kaggle_dataset, | |
image_generation_tool], | |
max_steps=6, | |
verbosity_level=1, | |
grammar=None, | |
planning_interval=2, | |
name=None, | |
description=None, | |
managed_agents=[conversional_agent], | |
prompt_templates=prompt_templates, | |
additional_authorized_imports=['pandas', | |
'matplotlib', | |
'seaborn'], | |
add_base_tools=True, | |
) | |
GradioUI(agent).launch() |