spedrox-sac commited on
Commit
6d802e9
·
verified ·
1 Parent(s): fecc952

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -0
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from huggingface_hub import InferenceClient
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ import os
5
+ from dotenv import load_dotenv
6
+ import pandas as pd
7
+ import sqlite3
8
+ import re
9
+
10
+ load_dotenv()
11
+ token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
12
+ api = InferenceClient(token=token)
13
+ parser = StrOutputParser()
14
+
15
+ # Streamlit app
16
+ st.title("AiSQL: AI-Powered SQL Query Generator")
17
+
18
+ # File uploader for CSV
19
+ uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
20
+
21
+ if uploaded_file:
22
+ # Read CSV into DataFrame
23
+ df = pd.read_csv(uploaded_file)
24
+ st.write("Uploaded Data:")
25
+ st.dataframe(df)
26
+
27
+ # Normalize column names: replace spaces and special characters with underscores
28
+ df.columns = [re.sub(r'\W+', '_', col.strip()) for col in df.columns]
29
+
30
+ st.write("Normalized Columns in the CSV:")
31
+ st.write(df.columns.tolist())
32
+
33
+ # Create SQLite in-memory database
34
+ conn = sqlite3.connect(':memory:')
35
+ df.to_sql('data', conn, index=False, if_exists='replace')
36
+
37
+ # Natural language query input
38
+ nl_query = st.text_area("Enter your query in natural language or in code:")
39
+
40
+ if st.button("Run Query/Code"):
41
+ try:
42
+ # Generate SQL query using LLM
43
+ system_message = (
44
+ "You are an AI assistant that converts natural language queries into SQL queries based on the following table schema.\n"
45
+ f"Table name: data\n"
46
+ f"Columns: {', '.join(df.columns.tolist())}\n"
47
+ "Provide only the SQL query suggestion in code blocks without any explanations, comments, or other text."
48
+ )
49
+ messages = [
50
+ {"role": "system", "content": system_message},
51
+ {"role": "user", "content": nl_query}
52
+ ]
53
+ llm = api.chat.completions.create(
54
+ model="Qwen/Qwen2.5-Coder-32B-Instruct",
55
+ max_tokens=150,
56
+ messages=messages
57
+ )
58
+ raw_response = llm.choices[0].message['content'].strip()
59
+
60
+ # Remove code blocks if present
61
+ sql_query = re.sub(r'```sql\n?|\n?```', '', raw_response).strip()
62
+
63
+ # Additional cleaning: Extract the first SQL statement
64
+ match = re.search(r'\b(SELECT|INSERT|UPDATE|DELETE|CREATE|DROP|ALTER)\b[\s\S]*?;', sql_query, re.IGNORECASE)
65
+ if match:
66
+ sql_query = match.group(0)
67
+ else:
68
+ st.error("Failed to extract a valid SQL query from the response.")
69
+ st.write("**Raw LLM Response:**")
70
+ st.write(raw_response)
71
+ st.stop()
72
+
73
+ # Validate that the SQL query starts with a valid keyword
74
+ valid_sql_keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'CREATE', 'DROP', 'ALTER']
75
+ if not any(sql_query.upper().startswith(keyword) for keyword in valid_sql_keywords):
76
+ st.error("The generated SQL query does not start with a valid SQL command.")
77
+ st.write("**Extracted SQL Query:**")
78
+ st.write(sql_query)
79
+ st.stop()
80
+
81
+ st.markdown(f"**Generated SQL Query:** `{sql_query}`")
82
+
83
+ # Execute SQL query
84
+ result = pd.read_sql_query(sql_query, conn)
85
+ st.write("Query Results:")
86
+ st.dataframe(result)
87
+ except Exception as e:
88
+ st.error(f"Error: {e}")
89
+
90
+ # Generate query suggestions using LLM
91
+ if st.button("Show Query Suggestions"):
92
+ try:
93
+ system_message = (
94
+ "You are an AI assistant that provides SQL query suggestions based on the following table schema.\n"
95
+ f"Table name: data\n"
96
+ f"Columns: {', '.join(df.columns.tolist())}\n"
97
+ "Provide exactly 5 example SQL queries separated by semicolons without any explanations, comments, or code blocks."
98
+ )
99
+ suggestion_messages = [
100
+ {"role": "system", "content": system_message},
101
+ {"role": "user", "content": "Provide SQL query suggestions."}
102
+ ]
103
+ suggestions_llm = api.chat.completions.create(
104
+ model="Qwen/Qwen2.5-Coder-32B-Instruct",
105
+ max_tokens=300,
106
+ messages=suggestion_messages
107
+ )
108
+ raw_suggestions = suggestions_llm.choices[0].message['content']
109
+ # Remove code blocks if present
110
+ suggestions = re.sub(r'```sql\n?|\n?```', '', raw_suggestions).strip()
111
+
112
+ # Split multiple queries separated by semicolons
113
+ suggestions_list = [query.strip() for query in suggestions.split(';') if query.strip()]
114
+
115
+ # Validate each suggestion starts with a valid SQL keyword
116
+ valid_sql_keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'CREATE', 'DROP', 'ALTER']
117
+ valid_suggestions = []
118
+ for query in suggestions_list:
119
+ if any(query.upper().startswith(keyword) for keyword in valid_sql_keywords):
120
+ valid_suggestions.append(query + ';')
121
+
122
+ st.session_state['valid_suggestions'] = valid_suggestions
123
+
124
+ if valid_suggestions:
125
+ formatted_suggestions = ';\n'.join(valid_suggestions)
126
+ st.write("**Query Suggestions:**")
127
+ st.code(formatted_suggestions, language='sql')
128
+
129
+ # Optionally, allow users to select a suggestion to execute
130
+ if 'valid_suggestions' in st.session_state:
131
+ selected_query = st.selectbox("Select a query to execute:", st.session_state['valid_suggestions'])
132
+ if st.button("Execute Selected Query"):
133
+ # Execute the selected query
134
+ try:
135
+ st.write(f"**Executing SQL Query:** `{selected_query}`")
136
+ result = pd.read_sql_query(selected_query, conn)
137
+ st.write("Query Results:")
138
+ st.dataframe(result)
139
+ except Exception as e:
140
+ st.error(f"Error executing selected query: {e}")
141
+ else:
142
+ st.error("No valid SQL query suggestions were generated.")
143
+ st.write("**Raw Suggestions Response:**")
144
+ st.write(suggestions)
145
+ except Exception as e:
146
+ st.error(f"Error generating suggestions: {e}")