Spaces:

DrishtiSharma
/

sql-rag

Running

App Files Files Community

DrishtiSharma commited on Jan 14

Commit

70acfe7

verified ·

1 Parent(s): 9f3c9dc

Update dummy_funcs.py

Browse files

Files changed (1) hide show

dummy_funcs.py +62 -50

dummy_funcs.py CHANGED Viewed

@@ -228,16 +228,17 @@ def ask_gpt4o_for_visualization(query, df, llm, retries=2):
     numeric_columns = df.select_dtypes(include='number').columns.tolist()
     categorical_columns = df.select_dtypes(exclude='number').columns.tolist()
-    # Enhanced Prompt with More Examples
     prompt = f"""
     Analyze the following query and suggest the most suitable visualization(s) using the dataset.
     **Query:** "{query}"
-    **Numeric Columns (for Y-axis):** {', '.join(numeric_columns) if numeric_columns else 'None'}
-    **Categorical Columns (for X-axis or grouping):** {', '.join(categorical_columns) if categorical_columns else 'None'}
-    Suggest visualizations in this exact JSON format:
     [
       {{
         "chart_type": "bar/box/line/scatter/pie/heatmap",
@@ -249,83 +250,96 @@ def ask_gpt4o_for_visualization(query, df, llm, retries=2):
       }}
     ]
-    **Examples:**
-    - For salary distribution:
       {{
         "chart_type": "box",
         "x_axis": "job_title",
         "y_axis": "salary_in_usd",
         "group_by": "experience_level",
         "title": "Salary Distribution by Job Title and Experience",
-        "description": "A box plot showing salary ranges across job titles and experience levels."
       }}
-    - For company size comparison:
-      {{
-        "chart_type": "bar",
-        "x_axis": "company_size",
-        "y_axis": "salary_in_usd",
-        "group_by": null,
-        "title": "Average Salary by Company Size",
-        "description": "A bar chart comparing the average salaries across different company sizes."
-      }}
-    - For revenue trends over time:
       {{
         "chart_type": "line",
-        "x_axis": "year",
-        "y_axis": "revenue",
-        "group_by": null,
-        "title": "Revenue Growth Over Years",
-        "description": "A line chart showing the trend of revenue over the years."
       }}
-    - For market share breakdown:
       {{
         "chart_type": "pie",
-        "x_axis": "market_segment",
         "y_axis": null,
         "group_by": null,
-        "title": "Market Share by Segment",
-        "description": "A pie chart showing the distribution of market share across various segments."
       }}
-    - For correlation analysis:
       {{
         "chart_type": "scatter",
-        "x_axis": "years_of_experience",
         "y_axis": "salary_in_usd",
-        "group_by": "job_title",
-        "title": "Experience vs Salary by Job Title",
-        "description": "A scatter plot showing the relationship between years of experience and salary across job titles."
       }}
-    - For data density:
       {{
         "chart_type": "heatmap",
-        "x_axis": "department",
-        "y_axis": "region",
         "group_by": null,
-        "title": "Employee Distribution by Department and Region",
-        "description": "A heatmap showing the concentration of employees across departments and regions."
       }}
-    Only suggest visualizations that make sense for the data and the query.
     """
     for attempt in range(retries + 1):
         try:
-            # Generate response from the model
             response = llm.generate(prompt)
-            # Load JSON response
             suggestions = json.loads(response)
-            # Validate response structure
             if isinstance(suggestions, list):
-                valid_suggestions = [
-                    s for s in suggestions if all(k in s for k in ["chart_type", "x_axis", "y_axis"])
-                ]
                 if valid_suggestions:
                     return valid_suggestions
                 else:
@@ -333,21 +347,19 @@ def ask_gpt4o_for_visualization(query, df, llm, retries=2):
                     return None
             elif isinstance(suggestions, dict):
-                if all(k in suggestions for k in ["chart_type", "x_axis", "y_axis"]):
                     return [suggestions]
                 else:
-                    st.warning("⚠️ GPT-4o's suggestion is incomplete.")
                     return None
         except json.JSONDecodeError:
             st.warning(f"⚠️ Attempt {attempt + 1}: GPT-4o returned invalid JSON.")
         except Exception as e:
             st.error(f"⚠️ Error during GPT-4o call: {e}")
-        # Retry if necessary
         if attempt < retries:
             st.info("🔄 Retrying visualization suggestion...")
     st.error("❌ Failed to generate a valid visualization after multiple attempts.")
     return None

     numeric_columns = df.select_dtypes(include='number').columns.tolist()
     categorical_columns = df.select_dtypes(exclude='number').columns.tolist()
+    # Enhanced Prompt with Dataset-Specific, Query-Based Examples
     prompt = f"""
     Analyze the following query and suggest the most suitable visualization(s) using the dataset.
     **Query:** "{query}"
+    **Dataset Overview:**
+    - **Numeric Columns (for Y-axis):** {', '.join(numeric_columns) if numeric_columns else 'None'}
+    - **Categorical Columns (for X-axis or grouping):** {', '.join(categorical_columns) if categorical_columns else 'None'}
+    **Expected JSON Response:**
     [
       {{
         "chart_type": "bar/box/line/scatter/pie/heatmap",
       }}
     ]
+    **Query-Based Examples:**
+    - **Query:** "What is the salary distribution across different job titles?"
+      **Suggested Visualization:**
       {{
         "chart_type": "box",
         "x_axis": "job_title",
         "y_axis": "salary_in_usd",
         "group_by": "experience_level",
         "title": "Salary Distribution by Job Title and Experience",
+        "description": "A box plot to show how salaries vary across different job titles and experience levels."
       }}
+    - **Query:** "Show the average salary by company size and employment type."
+      **Suggested Visualizations:**
+      [
+        {{
+          "chart_type": "bar",
+          "x_axis": "company_size",
+          "y_axis": "salary_in_usd",
+          "group_by": "employment_type",
+          "title": "Average Salary by Company Size and Employment Type",
+          "description": "A grouped bar chart comparing average salaries across company sizes and employment types."
+        }},
+        {{
+          "chart_type": "heatmap",
+          "x_axis": "company_size",
+          "y_axis": "salary_in_usd",
+          "group_by": "employment_type",
+          "title": "Salary Heatmap by Company Size and Employment Type",
+          "description": "A heatmap showing salary concentration across company sizes and employment types."
+        }}
+      ]
+    - **Query:** "How has the average salary changed over the years?"
+      **Suggested Visualization:**
       {{
         "chart_type": "line",
+        "x_axis": "work_year",
+        "y_axis": "salary_in_usd",
+        "group_by": "experience_level",
+        "title": "Average Salary Trend Over Years",
+        "description": "A line chart showing how the average salary has changed across different experience levels over the years."
       }}
+    - **Query:** "What is the employee distribution by company location?"
+      **Suggested Visualization:**
       {{
         "chart_type": "pie",
+        "x_axis": "company_location",
         "y_axis": null,
         "group_by": null,
+        "title": "Employee Distribution by Company Location",
+        "description": "A pie chart showing the distribution of employees across company locations."
       }}
+    - **Query:** "Is there a relationship between remote work ratio and salary?"
+      **Suggested Visualization:**
       {{
         "chart_type": "scatter",
+        "x_axis": "remote_ratio",
         "y_axis": "salary_in_usd",
+        "group_by": "experience_level",
+        "title": "Remote Work Ratio vs Salary",
+        "description": "A scatter plot to analyze the relationship between remote work ratio and salary."
       }}
+    - **Query:** "Which job titles have the highest salaries across regions?"
+      **Suggested Visualization:**
       {{
         "chart_type": "heatmap",
+        "x_axis": "job_title",
+        "y_axis": "employee_residence",
         "group_by": null,
+        "title": "Salary Heatmap by Job Title and Region",
+        "description": "A heatmap showing the concentration of high-paying job titles across regions."
       }}
+    Only suggest visualizations that logically match the query and dataset.
     """
+    # Attempt LLM Response with Retry
     for attempt in range(retries + 1):
         try:
             response = llm.generate(prompt)
             suggestions = json.loads(response)
+            # Validate suggestions using helper
             if isinstance(suggestions, list):
+                valid_suggestions = [s for s in suggestions if is_valid_suggestion(s)]
                 if valid_suggestions:
                     return valid_suggestions
                 else:
                     return None
             elif isinstance(suggestions, dict):
+                if is_valid_suggestion(suggestions):
                     return [suggestions]
                 else:
+                    st.warning("⚠️ GPT-4o's suggestion is incomplete or invalid.")
                     return None
         except json.JSONDecodeError:
             st.warning(f"⚠️ Attempt {attempt + 1}: GPT-4o returned invalid JSON.")
         except Exception as e:
             st.error(f"⚠️ Error during GPT-4o call: {e}")
         if attempt < retries:
             st.info("🔄 Retrying visualization suggestion...")
     st.error("❌ Failed to generate a valid visualization after multiple attempts.")
     return None