File size: 5,691 Bytes
a7dc99b
9b5b26a
 
 
c19d193
6aae614
156c068
8fe992b
a7dc99b
156c068
a7dc99b
9b5b26a
 
5df72d6
9b5b26a
3d1237b
9b5b26a
 
 
 
 
 
 
 
a7dc99b
156c068
a7dc99b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156c068
 
 
 
 
a7dc99b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b5b26a
156c068
 
 
 
 
 
 
 
9b5b26a
156c068
 
 
 
 
9b5b26a
156c068
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b5b26a
156c068
9b5b26a
156c068
 
 
 
 
 
 
 
 
 
 
8c01ffb
156c068
8c01ffb
6aae614
ae7a494
 
 
 
e121372
bf6d34c
 
29ec968
fe328e0
13d500a
8c01ffb
 
9b5b26a
 
8c01ffb
861422e
 
9b5b26a
8c01ffb
8fe992b
156c068
8c01ffb
 
 
 
 
 
156c068
 
8fe992b
 
9b5b26a
8c01ffb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from smolagents import CodeAgent,DuckDuckGoSearchTool,HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
# from typing import Optional

from kaggle.api.kaggle_api_extended import KaggleApi
import os

from Gradio_UI import GradioUI

# Below is an example of a tool that does nothing. Amaze us with your creativity !
@tool
def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
    #Keep this format for the description / args / args description but feel free to modify the tool
    """A tool that does nothing yet 
    Args:
        arg1: the first argument
        arg2: the second argument
    """
    return "What magic will you build ?"

@tool
def search_kaggle_datasets(search_term:str, kaggle_username:str = None, kaggle_key:str = None, max_results:int = 10)-> list[dict[str]]:
    """Search for datasets on Kaggle based on a search term.
    Args:
        search_term: The term to search for.
        kaggle_username: Your Kaggle username.
        kaggle_key: Your Kaggle API key.
        max_results: Maximum number of results to return.
    """
     # Initialize the Kaggle API
    api = KaggleApi()

    # Authenticate using provided credentials
    if kaggle_username and kaggle_key:
        # Create a temporary kaggle.json file
        kaggle_json_content = f'{{"username":"{kaggle_username}","key":"{kaggle_key}"}}'
        kaggle_json_path = os.path.expanduser("~/.kaggle/kaggle.json")
        os.makedirs(os.path.dirname(kaggle_json_path), exist_ok=True)
        with open(kaggle_json_path, "w") as f:
            f.write(kaggle_json_content)
        os.chmod(kaggle_json_path, 0o600)  # Set permissions to read/write for the owner only
    else:
        # Use the default kaggle.json file if no credentials are provided
        return 'Error in searching Kaggle datasets: No username or key provided.'

    try:
        api.authenticate()
    except Exception as e:
        return f"Error authenticating with Kaggle: {str(e)}"
    
    # Search for datasets
    datasets = api.dataset_list(search=search_term)

    # Limit the number of results
    datasets = datasets[:max_results]

    # Extract relevant information
    results = []
    for dataset in datasets:
        dataset_info = api.dataset_view(dataset)
        results.append({
            'title': dataset_info['title'],
            'url': f"https://www.kaggle.com/{dataset_info['ref']}",
            'size': dataset_info['size'],
            'files': dataset_info['files'],
            'last_updated': dataset_info['lastUpdated']
        })

    # Clean up the temporary kaggle.json file if it was created
    if kaggle_username and kaggle_key:
        os.remove(kaggle_json_path)

    return results

@tool
def download_kaggle_dataset(
    dataset_ref: str,
    download_path: str,
    kaggle_username: str = None,
    kaggle_key: str = None,
    unzip: bool = True
) -> str:
    """Download a dataset from Kaggle.
    Args:
        dataset_ref: The reference of the dataset (e.g., "username/dataset-name").
        download_path: The directory where the dataset will be downloaded.
        kaggle_username: Your Kaggle username (from kaggle.json).
        kaggle_key: Your Kaggle API key (from kaggle.json).
        unzip: Whether to unzip the dataset after downloading. Default is True.
    """
    # Initialize the Kaggle API
    api = KaggleApi()

    # Authenticate using provided credentials
    if kaggle_username and kaggle_key:
        # Create a temporary kaggle.json file
        kaggle_json_content = f'{{"username":"{kaggle_username}","key":"{kaggle_key}"}}'
        kaggle_json_path = os.path.expanduser("~/.kaggle/kaggle.json")
        os.makedirs(os.path.dirname(kaggle_json_path), exist_ok=True)
        with open(kaggle_json_path, "w") as f:
            f.write(kaggle_json_content)
        os.chmod(kaggle_json_path, 0o600)  # Set permissions to read/write for the owner only
    else:
        # Use the default kaggle.json file if no credentials are provided
        pass

    try:
        api.authenticate()
    except Exception as e:
        return f"Error authenticating with Kaggle: {str(e)}"

    # Ensure the download path exists
    os.makedirs(download_path, exist_ok=True)

    # Download the dataset
    api.dataset_download_files(dataset_ref, path=download_path, unzip=unzip)

    # Clean up the temporary kaggle.json file if it was created
    if kaggle_username and kaggle_key:
        os.remove(kaggle_json_path)

    return f"Dataset '{dataset_ref}' downloaded to '{download_path}'."

final_answer = FinalAnswerTool()

# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' 

model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)


# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)

with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)
    
agent = CodeAgent(
    model=model,
    tools=[final_answer, search_kaggle_datasets, download_kaggle_dataset], ## add your tools here (don't remove final answer)
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name=None,
    description=None,
    prompt_templates=prompt_templates,
    additional_authorized_imports=['pandas', 'matplotlib', 'seaborn'],
)


GradioUI(agent).launch()