Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -246,35 +246,38 @@ def add_stats_to_figure(fig, df, y_axis):
|
|
| 246 |
)
|
| 247 |
return fig
|
| 248 |
|
| 249 |
-
# Unified
|
| 250 |
def generate_visual_from_query(query, df, llm=None):
|
| 251 |
try:
|
|
|
|
| 252 |
matched_columns = fuzzy_match_columns(query)
|
| 253 |
|
| 254 |
-
# Fallback to LLM if
|
| 255 |
if not matched_columns and llm:
|
| 256 |
st.info("π€ No match found. Asking AI for suggestions...")
|
| 257 |
suggestion = ask_llm_for_columns(query, llm, df)
|
| 258 |
if suggestion:
|
| 259 |
matched_columns = [suggestion.get("x_axis"), suggestion.get("group_by")]
|
| 260 |
|
| 261 |
-
#
|
| 262 |
if len(matched_columns) >= 2:
|
| 263 |
x_axis, group_by = matched_columns[0], matched_columns[1]
|
| 264 |
elif len(matched_columns) == 1:
|
| 265 |
x_axis, group_by = matched_columns[0], None
|
| 266 |
else:
|
| 267 |
-
st.warning("β No matching columns found.
|
| 268 |
return None
|
| 269 |
|
| 270 |
-
#
|
|
|
|
|
|
|
| 271 |
if "distribution" in query:
|
| 272 |
fig = px.box(df, x=x_axis, y="salary_in_usd", color=group_by,
|
| 273 |
title=f"Salary Distribution by {x_axis.replace('_', ' ').title()}"
|
| 274 |
+ (f" and {group_by.replace('_', ' ').title()}" if group_by else ""))
|
| 275 |
return add_stats_to_figure(fig, df, "salary_in_usd")
|
| 276 |
|
| 277 |
-
#
|
| 278 |
elif "average" in query or "mean" in query:
|
| 279 |
grouped_df = df.groupby([x_axis] + ([group_by] if group_by else []))["salary_in_usd"].mean().reset_index()
|
| 280 |
fig = px.bar(grouped_df, x=x_axis, y="salary_in_usd", color=group_by,
|
|
@@ -282,22 +285,23 @@ def generate_visual_from_query(query, df, llm=None):
|
|
| 282 |
+ (f" and {group_by.replace('_', ' ').title()}" if group_by else ""))
|
| 283 |
return add_stats_to_figure(fig, df, "salary_in_usd")
|
| 284 |
|
| 285 |
-
#
|
| 286 |
elif "trend" in query and "work_year" in df.columns:
|
| 287 |
grouped_df = df.groupby(["work_year", x_axis])["salary_in_usd"].mean().reset_index()
|
| 288 |
fig = px.line(grouped_df, x="work_year", y="salary_in_usd", color=x_axis,
|
| 289 |
title=f"Salary Trend Over Years by {x_axis.replace('_', ' ').title()}")
|
| 290 |
return add_stats_to_figure(fig, df, "salary_in_usd")
|
| 291 |
|
| 292 |
-
#
|
| 293 |
elif "remote" in query:
|
| 294 |
grouped_df = df.groupby(["remote_ratio"] + ([group_by] if group_by else []))["salary_in_usd"].mean().reset_index()
|
| 295 |
fig = px.bar(grouped_df, x="remote_ratio", y="salary_in_usd", color=group_by,
|
| 296 |
title="Remote Work Impact on Salary")
|
| 297 |
return add_stats_to_figure(fig, df, "salary_in_usd")
|
| 298 |
|
|
|
|
| 299 |
else:
|
| 300 |
-
st.warning("β οΈ No suitable visualization
|
| 301 |
return None
|
| 302 |
|
| 303 |
except Exception as e:
|
|
@@ -305,7 +309,6 @@ def generate_visual_from_query(query, df, llm=None):
|
|
| 305 |
return None
|
| 306 |
|
| 307 |
|
| 308 |
-
|
| 309 |
# SQL-RAG Analysis
|
| 310 |
if st.session_state.df is not None:
|
| 311 |
temp_dir = tempfile.TemporaryDirectory()
|
|
|
|
| 246 |
)
|
| 247 |
return fig
|
| 248 |
|
| 249 |
+
# Unified Visualization Generator with Fuzzy Matching and LLM Fallback
|
| 250 |
def generate_visual_from_query(query, df, llm=None):
|
| 251 |
try:
|
| 252 |
+
# Step 1: Attempt Fuzzy Matching
|
| 253 |
matched_columns = fuzzy_match_columns(query)
|
| 254 |
|
| 255 |
+
# Step 2: Fallback to LLM if no columns are matched
|
| 256 |
if not matched_columns and llm:
|
| 257 |
st.info("π€ No match found. Asking AI for suggestions...")
|
| 258 |
suggestion = ask_llm_for_columns(query, llm, df)
|
| 259 |
if suggestion:
|
| 260 |
matched_columns = [suggestion.get("x_axis"), suggestion.get("group_by")]
|
| 261 |
|
| 262 |
+
# Step 3: Process Matched Columns
|
| 263 |
if len(matched_columns) >= 2:
|
| 264 |
x_axis, group_by = matched_columns[0], matched_columns[1]
|
| 265 |
elif len(matched_columns) == 1:
|
| 266 |
x_axis, group_by = matched_columns[0], None
|
| 267 |
else:
|
| 268 |
+
st.warning("β No matching columns found. Try rephrasing your query.")
|
| 269 |
return None
|
| 270 |
|
| 271 |
+
# Step 4: Visualization Generation
|
| 272 |
+
|
| 273 |
+
# Distribution Plot
|
| 274 |
if "distribution" in query:
|
| 275 |
fig = px.box(df, x=x_axis, y="salary_in_usd", color=group_by,
|
| 276 |
title=f"Salary Distribution by {x_axis.replace('_', ' ').title()}"
|
| 277 |
+ (f" and {group_by.replace('_', ' ').title()}" if group_by else ""))
|
| 278 |
return add_stats_to_figure(fig, df, "salary_in_usd")
|
| 279 |
|
| 280 |
+
# Average Salary Plot
|
| 281 |
elif "average" in query or "mean" in query:
|
| 282 |
grouped_df = df.groupby([x_axis] + ([group_by] if group_by else []))["salary_in_usd"].mean().reset_index()
|
| 283 |
fig = px.bar(grouped_df, x=x_axis, y="salary_in_usd", color=group_by,
|
|
|
|
| 285 |
+ (f" and {group_by.replace('_', ' ').title()}" if group_by else ""))
|
| 286 |
return add_stats_to_figure(fig, df, "salary_in_usd")
|
| 287 |
|
| 288 |
+
# Salary Trends Over Time
|
| 289 |
elif "trend" in query and "work_year" in df.columns:
|
| 290 |
grouped_df = df.groupby(["work_year", x_axis])["salary_in_usd"].mean().reset_index()
|
| 291 |
fig = px.line(grouped_df, x="work_year", y="salary_in_usd", color=x_axis,
|
| 292 |
title=f"Salary Trend Over Years by {x_axis.replace('_', ' ').title()}")
|
| 293 |
return add_stats_to_figure(fig, df, "salary_in_usd")
|
| 294 |
|
| 295 |
+
# Remote Work Impact
|
| 296 |
elif "remote" in query:
|
| 297 |
grouped_df = df.groupby(["remote_ratio"] + ([group_by] if group_by else []))["salary_in_usd"].mean().reset_index()
|
| 298 |
fig = px.bar(grouped_df, x="remote_ratio", y="salary_in_usd", color=group_by,
|
| 299 |
title="Remote Work Impact on Salary")
|
| 300 |
return add_stats_to_figure(fig, df, "salary_in_usd")
|
| 301 |
|
| 302 |
+
# No Specific Match
|
| 303 |
else:
|
| 304 |
+
st.warning("β οΈ No suitable visualization to display!")
|
| 305 |
return None
|
| 306 |
|
| 307 |
except Exception as e:
|
|
|
|
| 309 |
return None
|
| 310 |
|
| 311 |
|
|
|
|
| 312 |
# SQL-RAG Analysis
|
| 313 |
if st.session_state.df is not None:
|
| 314 |
temp_dir = tempfile.TemporaryDirectory()
|