First_agent_template

Sleeping

App Files Files Community

First_agent_template / app.py

lokami

Add search_kaggle_datasets, download_kaggle_dataset and authorize the use of datasci libs

156c068 2 months ago

raw

history blame

5.69 kB

	from smolagents import CodeAgent,DuckDuckGoSearchTool,HfApiModel,load_tool,tool
	import datetime
	import requests
	import pytz
	import yaml
	from tools.final_answer import FinalAnswerTool
	# from typing import Optional

	from kaggle.api.kaggle_api_extended import KaggleApi
	import os

	from Gradio_UI import GradioUI

	# Below is an example of a tool that does nothing. Amaze us with your creativity !
	@tool
	def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
	#Keep this format for the description / args / args description but feel free to modify the tool
	"""A tool that does nothing yet
	Args:
	arg1: the first argument
	arg2: the second argument
	"""
	return "What magic will you build ?"

	@tool
	def search_kaggle_datasets(search_term:str, kaggle_username:str = None, kaggle_key:str = None, max_results:int = 10)-> list[dict[str]]:
	"""Search for datasets on Kaggle based on a search term.
	Args:
	search_term: The term to search for.
	kaggle_username: Your Kaggle username.
	kaggle_key: Your Kaggle API key.
	max_results: Maximum number of results to return.
	"""
	# Initialize the Kaggle API
	api = KaggleApi()

	# Authenticate using provided credentials
	if kaggle_username and kaggle_key:
	# Create a temporary kaggle.json file
	kaggle_json_content = f'{{"username":"{kaggle_username}","key":"{kaggle_key}"}}'
	kaggle_json_path = os.path.expanduser("~/.kaggle/kaggle.json")
	os.makedirs(os.path.dirname(kaggle_json_path), exist_ok=True)
	with open(kaggle_json_path, "w") as f:
	f.write(kaggle_json_content)
	os.chmod(kaggle_json_path, 0o600) # Set permissions to read/write for the owner only
	else:
	# Use the default kaggle.json file if no credentials are provided
	return 'Error in searching Kaggle datasets: No username or key provided.'

	try:
	api.authenticate()
	except Exception as e:
	return f"Error authenticating with Kaggle: {str(e)}"

	# Search for datasets
	datasets = api.dataset_list(search=search_term)

	# Limit the number of results
	datasets = datasets[:max_results]

	# Extract relevant information
	results = []
	for dataset in datasets:
	dataset_info = api.dataset_view(dataset)
	results.append({
	'title': dataset_info['title'],
	'url': f"https://www.kaggle.com/{dataset_info['ref']}",
	'size': dataset_info['size'],
	'files': dataset_info['files'],
	'last_updated': dataset_info['lastUpdated']
	})

	# Clean up the temporary kaggle.json file if it was created
	if kaggle_username and kaggle_key:
	os.remove(kaggle_json_path)

	return results

	@tool
	def download_kaggle_dataset(
	dataset_ref: str,
	download_path: str,
	kaggle_username: str = None,
	kaggle_key: str = None,
	unzip: bool = True
	) -> str:
	"""Download a dataset from Kaggle.
	Args:
	dataset_ref: The reference of the dataset (e.g., "username/dataset-name").
	download_path: The directory where the dataset will be downloaded.
	kaggle_username: Your Kaggle username (from kaggle.json).
	kaggle_key: Your Kaggle API key (from kaggle.json).
	unzip: Whether to unzip the dataset after downloading. Default is True.
	"""
	# Initialize the Kaggle API
	api = KaggleApi()

	# Authenticate using provided credentials
	if kaggle_username and kaggle_key:
	# Create a temporary kaggle.json file
	kaggle_json_content = f'{{"username":"{kaggle_username}","key":"{kaggle_key}"}}'
	kaggle_json_path = os.path.expanduser("~/.kaggle/kaggle.json")
	os.makedirs(os.path.dirname(kaggle_json_path), exist_ok=True)
	with open(kaggle_json_path, "w") as f:
	f.write(kaggle_json_content)
	os.chmod(kaggle_json_path, 0o600) # Set permissions to read/write for the owner only
	else:
	# Use the default kaggle.json file if no credentials are provided
	pass

	try:
	api.authenticate()
	except Exception as e:
	return f"Error authenticating with Kaggle: {str(e)}"

	# Ensure the download path exists
	os.makedirs(download_path, exist_ok=True)

	# Download the dataset
	api.dataset_download_files(dataset_ref, path=download_path, unzip=unzip)

	# Clean up the temporary kaggle.json file if it was created
	if kaggle_username and kaggle_key:
	os.remove(kaggle_json_path)

	return f"Dataset '{dataset_ref}' downloaded to '{download_path}'."

	final_answer = FinalAnswerTool()

	# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
	# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'

	model = HfApiModel(
	max_tokens=2096,
	temperature=0.5,
	model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
	custom_role_conversions=None,
	)


	# Import tool from Hub
	image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)

	with open("prompts.yaml", 'r') as stream:
	prompt_templates = yaml.safe_load(stream)

	agent = CodeAgent(
	model=model,
	tools=[final_answer, search_kaggle_datasets, download_kaggle_dataset], ## add your tools here (don't remove final answer)
	max_steps=6,
	verbosity_level=1,
	grammar=None,
	planning_interval=None,
	name=None,
	description=None,
	prompt_templates=prompt_templates,
	additional_authorized_imports=['pandas', 'matplotlib', 'seaborn'],
	)


	GradioUI(agent).launch()