GenBIChatbotfree

Sleeping

App Files Files Community

Ari commited on Sep 25, 2024

Commit

5189e45

verified ·

1 Parent(s): 0bb1965

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -9

app.py CHANGED Viewed

@@ -27,15 +27,16 @@ csv_file = st.file_uploader("Upload your CSV file", type=["csv"])
 if csv_file is None:
     data = pd.read_csv("default_data.csv")  # Ensure this file exists in your working directory
     st.write("Using default_data.csv file.")
 else:
     data = pd.read_csv(csv_file)
     st.write(f"Data Preview ({csv_file.name}):")
     st.dataframe(data.head())
 # Step 2: Load CSV data into a persistent SQLite database
 db_file = 'my_database.db'
 conn = sqlite3.connect(db_file)
-table_name = csv_file.name.split('.')[0] if csv_file else "default_table"
 data.to_sql(table_name, conn, index=False, if_exists='replace')
 # SQL table metadata (for validation and schema)
@@ -43,7 +44,7 @@ valid_columns = list(data.columns)
 st.write(f"Valid columns: {valid_columns}")
 # Step 3: Set up the LLM Chain to generate SQL queries
-template = """
 You are an expert data scientist. Given a natural language question, the name of the table, and a list of valid columns, generate a valid SQL query that answers the question.
 Ensure that:
@@ -61,9 +62,9 @@ Valid columns: {columns}
 SQL Query:
 """
-prompt = PromptTemplate(template=template, input_variables=['question', 'table_name', 'columns'])
 llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
-sql_generation_chain = LLMChain(llm=llm, prompt=prompt)
 # Optional: Clean up function to remove incorrect COLLATE NOCASE usage
 def clean_sql_query(query):
@@ -107,17 +108,34 @@ def process_input():
                     'columns': columns
                 })
-                # Debug: Display generated SQL query for inspection
-                st.write(f"Generated SQL Query:\n{generated_sql}")
                 # Clean the SQL query
                 generated_sql = clean_sql_query(generated_sql)
                 # Attempt to execute SQL query and handle exceptions
                 try:
                     result = pd.read_sql_query(generated_sql, conn)
-                    assistant_response = f"Generated SQL Query:\n{generated_sql}"
-                    st.session_state.history.append({"role": "assistant", "content": assistant_response})
                     st.session_state.history.append({"role": "assistant", "content": result})
                 except Exception as e:
                     logging.error(f"An error occurred during SQL execution: {e}")

 if csv_file is None:
     data = pd.read_csv("default_data.csv")  # Ensure this file exists in your working directory
     st.write("Using default_data.csv file.")
+    table_name = "default_table"
 else:
     data = pd.read_csv(csv_file)
+    table_name = csv_file.name.split('.')[0]
     st.write(f"Data Preview ({csv_file.name}):")
     st.dataframe(data.head())
 # Step 2: Load CSV data into a persistent SQLite database
 db_file = 'my_database.db'
 conn = sqlite3.connect(db_file)
 data.to_sql(table_name, conn, index=False, if_exists='replace')
 # SQL table metadata (for validation and schema)
 st.write(f"Valid columns: {valid_columns}")
 # Step 3: Set up the LLM Chain to generate SQL queries
+sql_template = """
 You are an expert data scientist. Given a natural language question, the name of the table, and a list of valid columns, generate a valid SQL query that answers the question.
 Ensure that:
 SQL Query:
 """
+sql_prompt = PromptTemplate(template=sql_template, input_variables=['question', 'table_name', 'columns'])
 llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
+sql_generation_chain = LLMChain(llm=llm, prompt=sql_prompt)
 # Optional: Clean up function to remove incorrect COLLATE NOCASE usage
 def clean_sql_query(query):
                     'columns': columns
                 })
                 # Clean the SQL query
                 generated_sql = clean_sql_query(generated_sql)
                 # Attempt to execute SQL query and handle exceptions
                 try:
                     result = pd.read_sql_query(generated_sql, conn)
+                    # Limit the result to first 5 rows for brevity
+                    result_limited = result.head(5)
+                    result_str = result_limited.to_string(index=False)
+                    # Generate natural language answer
+                    answer_template = """
+                    Given the user's question and the SQL query result, provide a concise and informative answer to the question using the data from the query result.
+                    User's question: {question}
+                    Query result:
+                    {result}
+                    Answer:
+                    """
+                    answer_prompt = PromptTemplate(template=answer_template, input_variables=['question', 'result'])
+                    answer_chain = LLMChain(llm=llm, prompt=answer_prompt)
+                    assistant_answer = answer_chain.run({'question': user_prompt, 'result': result_str})
+                    # Append the assistant's answer to the history
+                    st.session_state.history.append({"role": "assistant", "content": assistant_answer})
+                    # Append the result DataFrame to the history
                     st.session_state.history.append({"role": "assistant", "content": result})
                 except Exception as e:
                     logging.error(f"An error occurred during SQL execution: {e}")