Vivek-tiwari commited on
Commit
75d92c7
·
verified ·
1 Parent(s): 6880ce9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import gradio as gr
4
+ from transformers import ReactCodeAgent, HfEngine, Tool
5
+ import pandas as pd
6
+
7
+ from gradio import Chatbot
8
+ from transformers.agents import stream_to_gradio
9
+ from huggingface_hub import login
10
+ from gradio.data_classes import FileData
11
+
12
+ login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
13
+
14
+ llm_engine = HfEngine("mistralai/Mistral-Nemo-Instruct-2407")
15
+
16
+ agent = ReactCodeAgent(
17
+ tools=[],
18
+ llm_engine=llm_engine,
19
+ additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "scipy.stats"],
20
+ max_iterations=10,
21
+ )
22
+
23
+ base_prompt = """<task>You are an expert data analyst.
24
+ According to the features you have and the data structure given below, determine which feature should be the target.
25
+ Then list 5 interesting questions that could be asked on this data, for instance about specific correlations with target variable.
26
+ Then answer these questions one by one, by finding the relevant numbers.
27
+ <important>Meanwhile, plot some figures using matplotlib/seaborn and save them to the (already existing) folder './figures/': take care to clear each figure with plt.clf() before doing another plot.
28
+ In your final answer: summarize these correlations and trends
29
+ After each number derive real worlds insights, for instance: "Correlation between is_december and boredness is 1.3453, which suggest people are more bored in winter".
30
+ <important>Your final answer should be a long string with at least 3 numbered, detailed parts and a statement of explaining why you chose that as an answer.
31
+ Structure of the data:
32
+ {structure_notes}
33
+ <important>The data file is passed to you as the variable data_file, it is a pandas dataframe, you can use it directly.
34
+ <important>DO NOT try to load data_file, it is already a dataframe pre-loaded in your python interpreter!
35
+ """
36
+
37
+
38
+ def get_images_in_directory(directory):
39
+ image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'}
40
+
41
+ image_files = []
42
+ for root, dirs, files in os.walk(directory):
43
+ for file in files:
44
+ if os.path.splitext(file)[1].lower() in image_extensions:
45
+ image_files.append(os.path.join(root, file))
46
+ return image_files
47
+
48
+ def interact_with_agent(file_input, prompt):
49
+ shutil.rmtree("./figures")
50
+ os.makedirs("./figures")
51
+
52
+ data_file = pd.read_csv(file_input)
53
+ data_structure_notes = f"""- Description (output of .describe()):
54
+ {data_file.describe()}
55
+ - Columns with dtypes:
56
+ {data_file.dtypes}"""
57
+
58
+ prompt = base_prompt.format(structure_notes=data_structure_notes)
59
+
60
+ messages = [gr.ChatMessage(role="user", content=prompt)]
61
+ yield messages + [
62
+ gr.ChatMessage(role="assistant", content="⏳ _Starting task..._")
63
+ ]
64
+
65
+ plot_image_paths = {}
66
+ for msg in stream_to_gradio(agent, prompt, data_file=data_file):
67
+ messages.append(msg)
68
+ for image_path in get_images_in_directory("./figures"):
69
+ if image_path not in plot_image_paths:
70
+ image_message = gr.ChatMessage(
71
+ role="assistant",
72
+ content=FileData(path=image_path, mime_type="image/png"),
73
+ )
74
+ plot_image_paths[image_path] = True
75
+ messages.append(image_message)
76
+ yield messages + [
77
+ gr.ChatMessage(role="assistant", content="⏳ _Still processing..._")
78
+ ]
79
+ yield messages
80
+
81
+
82
+ with gr.Blocks(
83
+ theme=gr.themes.Soft(
84
+ primary_hue=gr.themes.colors.blue,
85
+ secondary_hue=gr.themes.colors.gray,
86
+ )
87
+ ) as demo:
88
+ gr.Markdown("""# Mistral-Nemo Data analyst 📊🤔
89
+ Drop a `.csv` file below, add notes to describe this data if needed, and Mistral-Nemo will analyze the file content and draw figures for you!**""")
90
+ file_input = gr.File(label="Your file to analyze")
91
+ text_input = gr.Textbox(
92
+ label="Additional notes to support the analysis"
93
+ )
94
+ submit = gr.Button("Run analysis!", variant="primary")
95
+ chatbot = gr.Chatbot(
96
+ label="Data Analyst Agent",
97
+ type="messages",
98
+ avatar_images=(
99
+ None,
100
+ "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png",
101
+ ),
102
+ )
103
+
104
+ submit.click(interact_with_agent, [file_input, text_input], [chatbot])
105
+
106
+ if __name__ == "__main__":
107
+ demo.launch()