izammohammed commited on
Commit
0081066
·
1 Parent(s): fdfc017

uploaded files

Browse files
Files changed (5) hide show
  1. app.py +121 -0
  2. credentials.json +0 -0
  3. prompt.txt +26 -0
  4. requirements.txt +16 -0
  5. utils.py +11 -0
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ from utils import save_json, load_json
5
+ from markdown import markdown
6
+ from utils import load_json
7
+ from autoviz import AutoViz_Class
8
+ import base64
9
+ from google.cloud import aiplatform
10
+ import base64
11
+ import vertexai
12
+ from vertexai.preview.generative_models import GenerativeModel, Part
13
+
14
+ #setup cloud
15
+ aiplatform.init(
16
+ project = "ultra-heading-407815",
17
+ location="us-central1"
18
+ )
19
+
20
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"
21
+
22
+
23
+ dataframe = None
24
+ st.title("GemInsights")
25
+ file = st.file_uploader(
26
+ "Pick a dataframe", type=["csv", "xlsx"], accept_multiple_files=False
27
+ )
28
+
29
+ if file is not None:
30
+ _, extension = os.path.splitext(file.name)
31
+ if extension == ".csv":
32
+ dataframe = pd.read_csv(file)
33
+ else:
34
+ dataframe = pd.read_excel(file)
35
+ st.write(dataframe.head())
36
+ st.write(f"updated a dataframe with shape {dataframe.shape}")
37
+
38
+ if file is not None:
39
+ text_input = st.text_input(
40
+ "Enter something about the data 👇",
41
+ label_visibility="visible",
42
+ disabled=False,
43
+ placeholder="eg:- This is a sales dataframe",
44
+ )
45
+
46
+ option = st.selectbox(
47
+ "Which is the target column?",
48
+ tuple(list(dataframe.columns)),
49
+ index=None,
50
+ placeholder="Select one column in here",
51
+ )
52
+
53
+ def plot(dataframe, target):
54
+
55
+ AV = AutoViz_Class()
56
+
57
+ dft = AV.AutoViz(
58
+ "",
59
+ sep=",",
60
+ depVar=target,
61
+ dfte=dataframe,
62
+ header=0,
63
+ verbose=2,
64
+ lowess=False,
65
+ chart_format="jpg",
66
+ max_rows_analyzed=500,
67
+ max_cols_analyzed=20,
68
+ save_plot_dir="plots",
69
+ )
70
+
71
+ def prompt_make(dataframe, target, info):
72
+ images = []
73
+ image_dir = f"plots/{target}"
74
+ image_files = os.listdir(image_dir)
75
+ for image_file in image_files:
76
+ image_path = os.path.join(image_dir, image_file)
77
+ img = open(image_path, "rb").read()
78
+ img_bytes = Part.from_data(
79
+ base64.b64decode(base64.encodebytes(img)), mime_type="image/jpeg"
80
+ )
81
+ images.append(img_bytes)
82
+ with open("prompt.txt", "rb") as file:
83
+ data = file.read()
84
+ prompt = f"{data}\n Here are some of the informations related to the dataset - '{info}'"
85
+
86
+ # print(f"{prompt}")
87
+ # print(images)
88
+ return prompt, images
89
+
90
+ def generate_res(prompt, images):
91
+ print("prompting ...")
92
+ model = GenerativeModel("gemini-pro-vision")
93
+ responses = model.generate_content(
94
+ [prompt]+images,
95
+ generation_config={
96
+ "max_output_tokens": 2048,
97
+ "temperature": 0.4,
98
+ "top_p": 1,
99
+ "top_k": 32
100
+ },
101
+ )
102
+ return responses.text
103
+
104
+
105
+
106
+ def generate(dataframe, text_input, option):
107
+ plot(dataframe, option)
108
+ prompt, images = prompt_make(dataframe, option, text_input)
109
+ res = generate_res(prompt, images)
110
+ return res
111
+
112
+ if st.button("Get Insights", type="primary"):
113
+ st.write("generating insights ...")
114
+ # running the pipeline
115
+
116
+ response = generate(dataframe, text_input, option)
117
+ res = markdown(response)
118
+ st.markdown(res, unsafe_allow_html=True)
119
+
120
+ else:
121
+ st.write("")
credentials.json ADDED
File without changes
prompt.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Act as an intelligent data Analyst who communicates in simple English and clear messages to the clients
2
+
3
+ We build an end-to-end application that involves visualizing datasets, and we aim to extract valuable insights from these visualizations using llm. The insights generated should be beneficial to both companies and end-users. It's crucial that the model refrains from explicitly mentioning the images and provides information in a clear, detailed, and actionable manner.
4
+ Here are the visualizations of the data. give the insights by considering the following points
5
+
6
+ Here are important notes for output generation:
7
+ - Analyze the visual elements within the dataset using the visualizations.
8
+ - Identify and describe any prominent trends, patterns, or anomalies observed in the visual representations.
9
+ - Derive insights that are specifically relevant to the industry or domain associated with the dataset.
10
+ - Emphasize actionable information that could be of value to companies operating in that industry.
11
+ - Explore the possibility of making predictions based on the visual content.
12
+ - Formulate insights that would be valuable from an end-user perspective.
13
+ - Consider how the extracted information can enhance user experience, decision-making, or engagement.
14
+ - Do not mention the images directly in your responses. Focus on conveying insights without explicitly stating the visual content.
15
+ - Ensure that the insights are presented in a language suitable for technical and non-technical audiences. I encourage you to give clear, detailed explanations.
16
+ - Prioritize insights that are actionable and can contribute to informed decision-making for both businesses and end-users.
17
+ - If there are any recognized design patterns or industry standards applicable to the analysis, please incorporate and explain them.
18
+
19
+ Note to Model:
20
+ - Do not explicitly reference the images in your responses.
21
+ - Focus on providing clear, detailed, and actionable insights.
22
+ - Ensure that the insights are presented in a language suitable for technical and non-technical audiences.
23
+
24
+ Remember to adapt the prompt based on the specific details of your dataset and the objectives of your application.
25
+ Give important actionable insights rather than giving all. give as pointwise. don't mention the visualizations of plots in the output.
26
+ don't use too many statistics jargons also.
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ google-generativeai
2
+ pandas
3
+ numpy
4
+ matplotlib
5
+ seaborn
6
+ python-box
7
+ pexpect
8
+ streamlit
9
+ dataframe_image
10
+ jinja2
11
+ PyYAML
12
+ autoviz
13
+ ipython
14
+ google-cloud-aiplatform
15
+ markdown
16
+ llama-index
utils.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from box import ConfigBox
3
+
4
+ def load_json(file):
5
+ with open(path) as f:
6
+ content = json.load(f)
7
+ return ConfigBox(content)
8
+
9
+ def save_json(file, content):
10
+ with open(path, "w") as f:
11
+ json.dump(data, f, indent=4)