import streamlit as st from huggingface_hub import InferenceClient from langchain_core.output_parsers import StrOutputParser import os from dotenv import load_dotenv import pandas as pd import sqlite3 import re load_dotenv() token = os.getenv('HUGGINGFACEHUB_API_TOKEN') api = InferenceClient(token=token) parser = StrOutputParser() # Streamlit app st.title("AiSQL: AI-Powered SQL Query Generator") # File uploader for CSV uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"]) if uploaded_file: # Read CSV into DataFrame df = pd.read_csv(uploaded_file) st.write("Uploaded Data:") st.dataframe(df) # Normalize column names: replace spaces and special characters with underscores df.columns = [re.sub(r'\W+', '_', col.strip()) for col in df.columns] st.write("Normalized Columns in the CSV:") st.write(df.columns.tolist()) # Create SQLite in-memory database conn = sqlite3.connect(':memory:') df.to_sql('data', conn, index=False, if_exists='replace') # Natural language query input nl_query = st.text_area("Enter your query in natural language or in code:") if st.button("Run Query/Code"): try: # Generate SQL query using LLM system_message = ( "You are an AI assistant that converts natural language queries into SQL queries based on the following table schema.\n" f"Table name: data\n" f"Columns: {', '.join(df.columns.tolist())}\n" "Provide only the SQL query suggestion in code blocks without any explanations, comments, or other text." ) messages = [ {"role": "system", "content": system_message}, {"role": "user", "content": nl_query} ] llm = api.chat.completions.create( model="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=150, messages=messages ) raw_response = llm.choices[0].message['content'].strip() # Remove code blocks if present sql_query = re.sub(r'```sql\n?|\n?```', '', raw_response).strip() # Additional cleaning: Extract the first SQL statement match = re.search(r'\b(SELECT|INSERT|UPDATE|DELETE|CREATE|DROP|ALTER)\b[\s\S]*?;', sql_query, re.IGNORECASE) if match: sql_query = match.group(0) else: st.error("Failed to extract a valid SQL query from the response.") st.write("**Raw LLM Response:**") st.write(raw_response) st.stop() # Validate that the SQL query starts with a valid keyword valid_sql_keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'CREATE', 'DROP', 'ALTER'] if not any(sql_query.upper().startswith(keyword) for keyword in valid_sql_keywords): st.error("The generated SQL query does not start with a valid SQL command.") st.write("**Extracted SQL Query:**") st.write(sql_query) st.stop() st.markdown(f"**Generated SQL Query:** `{sql_query}`") # Execute SQL query result = pd.read_sql_query(sql_query, conn) st.write("Query Results:") st.dataframe(result) except Exception as e: st.error(f"Error: {e}") # Generate query suggestions using LLM if st.button("Show Query Suggestions"): try: system_message = ( "You are an AI assistant that provides SQL query suggestions based on the following table schema.\n" f"Table name: data\n" f"Columns: {', '.join(df.columns.tolist())}\n" "Provide exactly 5 example SQL queries separated by semicolons without any explanations, comments, or code blocks." ) suggestion_messages = [ {"role": "system", "content": system_message}, {"role": "user", "content": "Provide SQL query suggestions."} ] suggestions_llm = api.chat.completions.create( model="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=300, messages=suggestion_messages ) raw_suggestions = suggestions_llm.choices[0].message['content'] # Remove code blocks if present suggestions = re.sub(r'```sql\n?|\n?```', '', raw_suggestions).strip() # Split multiple queries separated by semicolons suggestions_list = [query.strip() for query in suggestions.split(';') if query.strip()] # Validate each suggestion starts with a valid SQL keyword valid_sql_keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'CREATE', 'DROP', 'ALTER'] valid_suggestions = [] for query in suggestions_list: if any(query.upper().startswith(keyword) for keyword in valid_sql_keywords): valid_suggestions.append(query + ';') st.session_state['valid_suggestions'] = valid_suggestions if valid_suggestions: formatted_suggestions = ';\n'.join(valid_suggestions) st.write("**Query Suggestions:**") st.code(formatted_suggestions, language='sql') # Optionally, allow users to select a suggestion to execute if 'valid_suggestions' in st.session_state: selected_query = st.selectbox("Select a query to execute:", st.session_state['valid_suggestions']) if st.button("Execute Selected Query"): # Execute the selected query try: st.write(f"**Executing SQL Query:** `{selected_query}`") result = pd.read_sql_query(selected_query, conn) st.write("Query Results:") st.dataframe(result) except Exception as e: st.error(f"Error executing selected query: {e}") else: st.error("No valid SQL query suggestions were generated.") st.write("**Raw Suggestions Response:**") st.write(suggestions) except Exception as e: st.error(f"Error generating suggestions: {e}")