File size: 3,781 Bytes
2f812c4
c19d193
6aae614
59c1b1a
8fe992b
a7dc99b
156c068
a7dc99b
9b5b26a
 
c4f01da
 
9b5b26a
05d46d9
 
a7dc99b
 
156c068
 
 
 
c4f01da
 
 
05d46d9
c4f01da
 
 
 
 
 
 
05d46d9
 
156c068
a7dc99b
c4f01da
a7dc99b
 
 
 
 
 
 
c4f01da
a7dc99b
 
 
 
 
 
 
 
 
9b5b26a
156c068
 
 
 
 
 
9b5b26a
156c068
 
 
9b5b26a
156c068
 
 
05d46d9
 
156c068
c4f01da
8c01ffb
156c068
8c01ffb
6aae614
59c1b1a
ae7a494
 
 
 
e121372
bf6d34c
 
29ec968
fe328e0
13d500a
8c01ffb
9b5b26a
 
8c01ffb
861422e
 
2f812c4
e70e03d
 
2f812c4
 
 
 
 
9b5b26a
8c01ffb
8fe992b
c4f01da
 
59c1b1a
c4f01da
 
8c01ffb
 
 
8885992
8c01ffb
 
2f812c4
156c068
c4f01da
 
 
991fef8
8fe992b
 
9b5b26a
8c01ffb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from smolagents import CodeAgent,ToolCallingAgent,HfApiModel,load_tool,tool
import yaml
from tools.final_answer import FinalAnswerTool
from tools.user_input import UserInputTool

from kaggle.api.kaggle_api_extended import KaggleApi
import os

from Gradio_UI import GradioUI

os.environ['KAGGLE_USERNAME'] = ''
os.environ['KAGGLE_KEY'] = ''

def auth_kaggle() -> KaggleApi:
    """Authenticate Kaggle and return the API object.
    """
    api = KaggleApi()
    try:
        api.authenticate()
    except Exception as e:
        return f"Error authenticating with Kaggle: {str(e)}"
    return api

@tool
def search_kaggle_datasets(search_term:str, 
                           max_results:int = 10
                           ) -> list[dict[str]]:
    """Search for datasets on Kaggle based on a search term and return list of datasets metadata.
    Args:
        search_term: The term to search for.
        max_results: Maximum number of results to return.
    """

    kaggle_api = auth_kaggle()
    
    # Search for datasets
    datasets = kaggle_api.dataset_list(search=search_term)

    # Limit the number of results
    datasets = datasets[:max_results]

    # Extract relevant information
    results = []
    for dataset in datasets:
        dataset_info = kaggle_api.dataset_view(dataset)
        results.append({
            'title': dataset_info['title'],
            'url': f"https://www.kaggle.com/{dataset_info['ref']}",
            'size': dataset_info['size'],
            'files': dataset_info['files'],
            'last_updated': dataset_info['lastUpdated']
        })
    return results

@tool
def download_kaggle_dataset(
    dataset_ref: str,
    download_path: str,
    unzip: bool = True
) -> str:
    """Download a dataset from Kaggle.
    Args:
        dataset_ref: The reference of the dataset (e.g., "username/dataset-name").
        download_path: The directory where the dataset will be downloaded.
        unzip: Whether to unzip the dataset after downloading. Default is True.
    """
    # Ensure the download path exists
    os.makedirs(download_path, exist_ok=True)

    kaggle_api = auth_kaggle()

    # Download the dataset
    kaggle_api.dataset_download_files(dataset_ref, path=download_path, unzip=unzip)

    return f"Dataset '{dataset_ref}' downloaded to '{download_path}'."

final_answer = FinalAnswerTool()
user_input = UserInputTool()

# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' 

model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)

# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)

with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)

conversional_agent = ToolCallingAgent(
    model=model,
    tools=[user_input],
    max_steps=6,
    name='ask_question',
    description='Ask a question to the user and get the answer',
)
    
agent = CodeAgent(
    model=model,
    tools=[final_answer,
           search_kaggle_datasets, 
           user_input,
           download_kaggle_dataset,
           image_generation_tool],
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=2,
    name=None,
    description=None,
    managed_agents=[conversional_agent],
    prompt_templates=prompt_templates,
    additional_authorized_imports=['pandas', 
                                   'matplotlib', 
                                   'seaborn'],
    add_base_tools=True,
)


GradioUI(agent).launch()