DrishtiSharma commited on
Commit
9f3c9dc
Β·
verified Β·
1 Parent(s): e4ab33c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -46
app.py CHANGED
@@ -136,20 +136,17 @@ def ask_gpt4o_for_visualization(query, df, llm, retries=2):
136
  numeric_columns = df.select_dtypes(include='number').columns.tolist()
137
  categorical_columns = df.select_dtypes(exclude='number').columns.tolist()
138
 
139
- # Enhanced Prompt with Diverse, Query-Based Examples
140
  prompt = f"""
141
  Analyze the following query and suggest the most suitable visualization(s) using the dataset.
142
-
143
  **Query:** "{query}"
144
-
145
  **Dataset Overview:**
146
  - **Numeric Columns (for Y-axis):** {', '.join(numeric_columns) if numeric_columns else 'None'}
147
  - **Categorical Columns (for X-axis or grouping):** {', '.join(categorical_columns) if categorical_columns else 'None'}
148
-
149
  Suggest visualizations in this exact JSON format:
150
  [
151
  {{
152
- "chart_type": "bar/box/line/scatter/pie/heatmap",
153
  "x_axis": "categorical_or_time_column",
154
  "y_axis": "numeric_column",
155
  "group_by": "optional_column_for_grouping",
@@ -157,9 +154,7 @@ def ask_gpt4o_for_visualization(query, df, llm, retries=2):
157
  "description": "Why this chart is suitable"
158
  }}
159
  ]
160
-
161
  **Query-Based Examples:**
162
-
163
  - **Query:** "What is the salary distribution across different job titles?"
164
  **Suggested Visualization:**
165
  {{
@@ -170,84 +165,74 @@ def ask_gpt4o_for_visualization(query, df, llm, retries=2):
170
  "title": "Salary Distribution by Job Title and Experience",
171
  "description": "A box plot to show how salaries vary across different job titles and experience levels."
172
  }}
173
-
174
- - **Query:** "Show the average salary by company size and industry."
175
  **Suggested Visualizations:**
176
  [
177
  {{
178
  "chart_type": "bar",
179
  "x_axis": "company_size",
180
  "y_axis": "salary_in_usd",
181
- "group_by": "industry",
182
- "title": "Average Salary by Company Size and Industry",
183
- "description": "A grouped bar chart comparing average salaries across company sizes and industries."
184
  }},
185
  {{
186
  "chart_type": "heatmap",
187
- "x_axis": "industry",
188
- "y_axis": "company_size",
189
- "group_by": null,
190
- "title": "Salary Heatmap by Industry and Company Size",
191
- "description": "A heatmap showing salary concentration across industries and company sizes."
192
  }}
193
  ]
194
-
195
- - **Query:** "How has the company's revenue changed over the years?"
196
  **Suggested Visualization:**
197
  {{
198
  "chart_type": "line",
199
- "x_axis": "year",
200
- "y_axis": "revenue",
201
- "group_by": null,
202
- "title": "Yearly Revenue Growth",
203
- "description": "A line chart showing revenue growth over time."
204
  }}
205
-
206
- - **Query:** "What is the market share of each product category?"
207
  **Suggested Visualization:**
208
  {{
209
  "chart_type": "pie",
210
- "x_axis": "product_category",
211
  "y_axis": null,
212
  "group_by": null,
213
- "title": "Market Share by Product Category",
214
- "description": "A pie chart to show the market share distribution across different product categories."
215
  }}
216
-
217
- - **Query:** "Is there a correlation between years of experience and salary?"
218
  **Suggested Visualization:**
219
  {{
220
  "chart_type": "scatter",
221
- "x_axis": "years_of_experience",
222
  "y_axis": "salary_in_usd",
223
- "group_by": "job_title",
224
- "title": "Experience vs Salary by Job Title",
225
- "description": "A scatter plot to analyze the relationship between experience and salary across different job titles."
226
  }}
227
-
228
- - **Query:** "Which departments have the highest concentration of employees across regions?"
229
  **Suggested Visualization:**
230
  {{
231
  "chart_type": "heatmap",
232
- "x_axis": "department",
233
- "y_axis": "region",
234
  "group_by": null,
235
- "title": "Employee Distribution by Department and Region",
236
- "description": "A heatmap to visualize employee density across departments and regions."
237
  }}
238
-
239
  Only suggest visualizations that logically match the query and dataset.
240
  """
241
 
242
  for attempt in range(retries + 1):
243
  try:
244
- # Generate response from the model
245
  response = llm.generate(prompt)
246
-
247
- # Load JSON response
248
  suggestions = json.loads(response)
249
 
250
- # Validate response structure using the helper function
251
  if isinstance(suggestions, list):
252
  valid_suggestions = [s for s in suggestions if is_valid_suggestion(s)]
253
  if valid_suggestions:
 
136
  numeric_columns = df.select_dtypes(include='number').columns.tolist()
137
  categorical_columns = df.select_dtypes(exclude='number').columns.tolist()
138
 
139
+ # Prompt with Dataset-Specific, Query-Based Examples
140
  prompt = f"""
141
  Analyze the following query and suggest the most suitable visualization(s) using the dataset.
 
142
  **Query:** "{query}"
 
143
  **Dataset Overview:**
144
  - **Numeric Columns (for Y-axis):** {', '.join(numeric_columns) if numeric_columns else 'None'}
145
  - **Categorical Columns (for X-axis or grouping):** {', '.join(categorical_columns) if categorical_columns else 'None'}
 
146
  Suggest visualizations in this exact JSON format:
147
  [
148
  {{
149
+ "chdart_type": "bar/box/line/scatter/pie/heatmap",
150
  "x_axis": "categorical_or_time_column",
151
  "y_axis": "numeric_column",
152
  "group_by": "optional_column_for_grouping",
 
154
  "description": "Why this chart is suitable"
155
  }}
156
  ]
 
157
  **Query-Based Examples:**
 
158
  - **Query:** "What is the salary distribution across different job titles?"
159
  **Suggested Visualization:**
160
  {{
 
165
  "title": "Salary Distribution by Job Title and Experience",
166
  "description": "A box plot to show how salaries vary across different job titles and experience levels."
167
  }}
168
+ - **Query:** "Show the average salary by company size and employment type."
 
169
  **Suggested Visualizations:**
170
  [
171
  {{
172
  "chart_type": "bar",
173
  "x_axis": "company_size",
174
  "y_axis": "salary_in_usd",
175
+ "group_by": "employment_type",
176
+ "title": "Average Salary by Company Size and Employment Type",
177
+ "description": "A grouped bar chart comparing average salaries across company sizes and employment types."
178
  }},
179
  {{
180
  "chart_type": "heatmap",
181
+ "x_axis": "company_size",
182
+ "y_axis": "salary_in_usd",
183
+ "group_by": "employment_type",
184
+ "title": "Salary Heatmap by Company Size and Employment Type",
185
+ "description": "A heatmap showing salary concentration across company sizes and employment types."
186
  }}
187
  ]
188
+ - **Query:** "How has the average salary changed over the years?"
 
189
  **Suggested Visualization:**
190
  {{
191
  "chart_type": "line",
192
+ "x_axis": "work_year",
193
+ "y_axis": "salary_in_usd",
194
+ "group_by": "experience_level",
195
+ "title": "Average Salary Trend Over Years",
196
+ "description": "A line chart showing how the average salary has changed across different experience levels over the years."
197
  }}
198
+ - **Query:** "What is the employee distribution by company location?"
 
199
  **Suggested Visualization:**
200
  {{
201
  "chart_type": "pie",
202
+ "x_axis": "company_location",
203
  "y_axis": null,
204
  "group_by": null,
205
+ "title": "Employee Distribution by Company Location",
206
+ "description": "A pie chart showing the distribution of employees across company locations."
207
  }}
208
+ - **Query:** "Is there a relationship between remote work ratio and salary?"
 
209
  **Suggested Visualization:**
210
  {{
211
  "chart_type": "scatter",
212
+ "x_axis": "remote_ratio",
213
  "y_axis": "salary_in_usd",
214
+ "group_by": "experience_level",
215
+ "title": "Remote Work Ratio vs Salary",
216
+ "description": "A scatter plot to analyze the relationship between remote work ratio and salary."
217
  }}
218
+ - **Query:** "Which job titles have the highest salaries across regions?"
 
219
  **Suggested Visualization:**
220
  {{
221
  "chart_type": "heatmap",
222
+ "x_axis": "job_title",
223
+ "y_axis": "employee_residence",
224
  "group_by": null,
225
+ "title": "Salary Heatmap by Job Title and Region",
226
+ "description": "A heatmap showing the concentration of high-paying job titles across regions."
227
  }}
 
228
  Only suggest visualizations that logically match the query and dataset.
229
  """
230
 
231
  for attempt in range(retries + 1):
232
  try:
 
233
  response = llm.generate(prompt)
 
 
234
  suggestions = json.loads(response)
235
 
 
236
  if isinstance(suggestions, list):
237
  valid_suggestions = [s for s in suggestions if is_valid_suggestion(s)]
238
  if valid_suggestions: