Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,9 @@ from dotenv import load_dotenv
|
|
12 |
load_dotenv()
|
13 |
userdata = os.environ
|
14 |
|
|
|
|
|
|
|
15 |
|
16 |
def chat_with_groq(client:groq.Groq,
|
17 |
prompt:str,
|
@@ -46,29 +49,19 @@ def chat_with_groq(client:groq.Groq,
|
|
46 |
# logger.info(f"Completion: {completion}")
|
47 |
return completion.choices[0].message.content
|
48 |
|
49 |
-
def execute_duckdb_query(query:str)->pd.DataFrame:
|
50 |
-
"""
|
51 |
-
Execute a DuckDB query and return the result as a pandas DataFrame.
|
52 |
-
|
53 |
-
Args:
|
54 |
-
query (str): The DuckDB query to execute.
|
55 |
-
|
56 |
-
Returns:
|
57 |
-
pd.DataFrame: The result of the query as a pandas DataFrame.
|
58 |
-
"""
|
59 |
-
original_cwd = os.getcwd()
|
60 |
-
print(f"PATH:{original_cwd}")
|
61 |
-
os.chdir('data')
|
62 |
-
print(f"PATH:{os.getcwd()}")
|
63 |
-
|
64 |
try:
|
65 |
conn = duckdb.connect(database=":memory:", read_only=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
query_result = conn.execute(query).fetch_df().reset_index()
|
67 |
-
os.chdir(original_cwd)
|
68 |
return query_result
|
69 |
except Exception as e:
|
70 |
-
print(f"Error: {e}")
|
71 |
-
os.chdir(original_cwd)
|
72 |
raise e
|
73 |
def get_summarization(client:groq.Groq,
|
74 |
use_question:str,
|
@@ -258,10 +251,6 @@ base_prompt = """
|
|
258 |
* Ensure that the entire output is returned on only one single line
|
259 |
* Keep your query as simple and straightforward as possible; do not use subqueries
|
260 |
"""
|
261 |
-
table_description = """"""
|
262 |
-
tables_string = """"""
|
263 |
-
table_1 = """"""
|
264 |
-
table_1_wt_xt = """"""
|
265 |
user_question = """"""
|
266 |
|
267 |
# And some rules for querying the dataset:
|
@@ -272,38 +261,37 @@ user_question = """"""
|
|
272 |
|
273 |
|
274 |
def upload_file(files) -> List[str]:
|
275 |
-
# will have to change to the private system is initiializes
|
276 |
model = "llama3-8b-8192"
|
277 |
-
api_key:str=userdata.get("GROQ_API_KEY")
|
278 |
-
|
279 |
-
|
280 |
-
if type(files) == str:
|
281 |
files = [files]
|
|
|
282 |
stored_paths = []
|
283 |
stored_table_descriptions = []
|
284 |
tables = []
|
|
|
285 |
for file in files:
|
286 |
filename = Path(file.name).name
|
287 |
-
path =
|
288 |
|
289 |
# Copy the content of the temporary file to our destination
|
290 |
-
|
291 |
-
destination.write(source.read())
|
292 |
|
293 |
-
stored_paths.append(str(path
|
294 |
-
table_description = identify_column_datatypes_to_SQL_DEF(pd.read_csv(path),api_key,model)
|
295 |
-
desc = "Table:
|
296 |
stored_table_descriptions.append(desc)
|
297 |
tables.append(filename)
|
298 |
-
|
|
|
|
|
|
|
299 |
tables_string = join_with_and(tables)
|
300 |
-
|
301 |
-
table_1_wt_xt =
|
302 |
-
|
303 |
-
|
304 |
-
table_1 = tables[0]
|
305 |
-
table_1_wt_xt = table_1_wt_xt
|
306 |
-
return final
|
307 |
|
308 |
def user_prompt_sanitization(user_prompt:str)->str:
|
309 |
guide = """
|
@@ -388,6 +376,7 @@ with gr.Blocks() as demo:
|
|
388 |
upload_output = gr.Textbox(label="Upload Status", lines=5)
|
389 |
|
390 |
upload_button.click(upload_file, inputs=file_output, outputs=upload_output)
|
|
|
391 |
with gr.Tab("Query Interface"):
|
392 |
chatbot = gr.Chatbot()
|
393 |
with gr.Row():
|
@@ -395,8 +384,6 @@ with gr.Blocks() as demo:
|
|
395 |
submit_button = gr.Button("Submit")
|
396 |
submit_button.click(queryModel, inputs=[user_input], outputs=chatbot)
|
397 |
|
398 |
-
|
399 |
-
|
400 |
-
demo.launch(share=True)
|
401 |
|
402 |
|
|
|
12 |
load_dotenv()
|
13 |
userdata = os.environ
|
14 |
|
15 |
+
DATA_DIR = Path(os.getcwd()) / "data"
|
16 |
+
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
17 |
+
|
18 |
|
19 |
def chat_with_groq(client:groq.Groq,
|
20 |
prompt:str,
|
|
|
49 |
# logger.info(f"Completion: {completion}")
|
50 |
return completion.choices[0].message.content
|
51 |
|
52 |
+
def execute_duckdb_query(query: str) -> pd.DataFrame:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
try:
|
54 |
conn = duckdb.connect(database=":memory:", read_only=False)
|
55 |
+
|
56 |
+
# Load all CSV files from the data directory
|
57 |
+
for csv_file in DATA_DIR.glob("*.csv"):
|
58 |
+
table_name = csv_file.stem
|
59 |
+
conn.execute(f"CREATE TABLE {table_name} AS SELECT * FROM read_csv_auto('{csv_file}')")
|
60 |
+
|
61 |
query_result = conn.execute(query).fetch_df().reset_index()
|
|
|
62 |
return query_result
|
63 |
except Exception as e:
|
64 |
+
print(f"Error executing query: {e}")
|
|
|
65 |
raise e
|
66 |
def get_summarization(client:groq.Groq,
|
67 |
use_question:str,
|
|
|
251 |
* Ensure that the entire output is returned on only one single line
|
252 |
* Keep your query as simple and straightforward as possible; do not use subqueries
|
253 |
"""
|
|
|
|
|
|
|
|
|
254 |
user_question = """"""
|
255 |
|
256 |
# And some rules for querying the dataset:
|
|
|
261 |
|
262 |
|
263 |
def upload_file(files) -> List[str]:
|
|
|
264 |
model = "llama3-8b-8192"
|
265 |
+
api_key: str = userdata.get("GROQ_API_KEY")
|
266 |
+
|
267 |
+
if isinstance(files, str):
|
|
|
268 |
files = [files]
|
269 |
+
|
270 |
stored_paths = []
|
271 |
stored_table_descriptions = []
|
272 |
tables = []
|
273 |
+
|
274 |
for file in files:
|
275 |
filename = Path(file.name).name
|
276 |
+
path = DATA_DIR / filename
|
277 |
|
278 |
# Copy the content of the temporary file to our destination
|
279 |
+
shutil.copy2(file.name, path)
|
|
|
280 |
|
281 |
+
stored_paths.append(str(path))
|
282 |
+
table_description = identify_column_datatypes_to_SQL_DEF(pd.read_csv(path), api_key, model)
|
283 |
+
desc = f"Table: {filename}\nColumns:\n{table_description}"
|
284 |
stored_table_descriptions.append(desc)
|
285 |
tables.append(filename)
|
286 |
+
|
287 |
+
# Update global variables
|
288 |
+
global table_description, tables_string, table_1, table_1_wt_xt
|
289 |
+
table_description = "\n".join(stored_table_descriptions)
|
290 |
tables_string = join_with_and(tables)
|
291 |
+
table_1 = tables[0] if tables else ""
|
292 |
+
table_1_wt_xt = table_1.split('.')[0] if table_1 else ""
|
293 |
+
|
294 |
+
return "\n".join(stored_table_descriptions)
|
|
|
|
|
|
|
295 |
|
296 |
def user_prompt_sanitization(user_prompt:str)->str:
|
297 |
guide = """
|
|
|
376 |
upload_output = gr.Textbox(label="Upload Status", lines=5)
|
377 |
|
378 |
upload_button.click(upload_file, inputs=file_output, outputs=upload_output)
|
379 |
+
|
380 |
with gr.Tab("Query Interface"):
|
381 |
chatbot = gr.Chatbot()
|
382 |
with gr.Row():
|
|
|
384 |
submit_button = gr.Button("Submit")
|
385 |
submit_button.click(queryModel, inputs=[user_input], outputs=chatbot)
|
386 |
|
387 |
+
demo.launch()
|
|
|
|
|
388 |
|
389 |
|