blesspearl commited on
Commit
917d212
·
verified ·
1 Parent(s): 312dd70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -44
app.py CHANGED
@@ -12,6 +12,9 @@ from dotenv import load_dotenv
12
  load_dotenv()
13
  userdata = os.environ
14
 
 
 
 
15
 
16
  def chat_with_groq(client:groq.Groq,
17
  prompt:str,
@@ -46,29 +49,19 @@ def chat_with_groq(client:groq.Groq,
46
  # logger.info(f"Completion: {completion}")
47
  return completion.choices[0].message.content
48
 
49
- def execute_duckdb_query(query:str)->pd.DataFrame:
50
- """
51
- Execute a DuckDB query and return the result as a pandas DataFrame.
52
-
53
- Args:
54
- query (str): The DuckDB query to execute.
55
-
56
- Returns:
57
- pd.DataFrame: The result of the query as a pandas DataFrame.
58
- """
59
- original_cwd = os.getcwd()
60
- print(f"PATH:{original_cwd}")
61
- os.chdir('data')
62
- print(f"PATH:{os.getcwd()}")
63
-
64
  try:
65
  conn = duckdb.connect(database=":memory:", read_only=False)
 
 
 
 
 
 
66
  query_result = conn.execute(query).fetch_df().reset_index()
67
- os.chdir(original_cwd)
68
  return query_result
69
  except Exception as e:
70
- print(f"Error: {e}")
71
- os.chdir(original_cwd)
72
  raise e
73
  def get_summarization(client:groq.Groq,
74
  use_question:str,
@@ -258,10 +251,6 @@ base_prompt = """
258
  * Ensure that the entire output is returned on only one single line
259
  * Keep your query as simple and straightforward as possible; do not use subqueries
260
  """
261
- table_description = """"""
262
- tables_string = """"""
263
- table_1 = """"""
264
- table_1_wt_xt = """"""
265
  user_question = """"""
266
 
267
  # And some rules for querying the dataset:
@@ -272,38 +261,37 @@ user_question = """"""
272
 
273
 
274
  def upload_file(files) -> List[str]:
275
- # will have to change to the private system is initiializes
276
  model = "llama3-8b-8192"
277
- api_key:str=userdata.get("GROQ_API_KEY")
278
- data_dir = Path("data")
279
- data_dir.mkdir(parents=True, exist_ok=True)
280
- if type(files) == str:
281
  files = [files]
 
282
  stored_paths = []
283
  stored_table_descriptions = []
284
  tables = []
 
285
  for file in files:
286
  filename = Path(file.name).name
287
- path = data_dir / filename
288
 
289
  # Copy the content of the temporary file to our destination
290
- with open(file.name, "rb") as source, open(path, "wb") as destination:
291
- destination.write(source.read())
292
 
293
- stored_paths.append(str(path.absolute()))
294
- table_description = identify_column_datatypes_to_SQL_DEF(pd.read_csv(path),api_key,model)
295
- desc = "Table: " + filename + "\n Columns:\n" + table_description
296
  stored_table_descriptions.append(desc)
297
  tables.append(filename)
298
- # constructing a string
 
 
 
299
  tables_string = join_with_and(tables)
300
- final = "\n".join(stored_table_descriptions)
301
- table_1_wt_xt = tables[0].split('.')[0]
302
- table_description = final
303
- tables_string = tables_string
304
- table_1 = tables[0]
305
- table_1_wt_xt = table_1_wt_xt
306
- return final
307
 
308
  def user_prompt_sanitization(user_prompt:str)->str:
309
  guide = """
@@ -388,6 +376,7 @@ with gr.Blocks() as demo:
388
  upload_output = gr.Textbox(label="Upload Status", lines=5)
389
 
390
  upload_button.click(upload_file, inputs=file_output, outputs=upload_output)
 
391
  with gr.Tab("Query Interface"):
392
  chatbot = gr.Chatbot()
393
  with gr.Row():
@@ -395,8 +384,6 @@ with gr.Blocks() as demo:
395
  submit_button = gr.Button("Submit")
396
  submit_button.click(queryModel, inputs=[user_input], outputs=chatbot)
397
 
398
-
399
-
400
- demo.launch(share=True)
401
 
402
 
 
12
  load_dotenv()
13
  userdata = os.environ
14
 
15
+ DATA_DIR = Path(os.getcwd()) / "data"
16
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
17
+
18
 
19
  def chat_with_groq(client:groq.Groq,
20
  prompt:str,
 
49
  # logger.info(f"Completion: {completion}")
50
  return completion.choices[0].message.content
51
 
52
+ def execute_duckdb_query(query: str) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  try:
54
  conn = duckdb.connect(database=":memory:", read_only=False)
55
+
56
+ # Load all CSV files from the data directory
57
+ for csv_file in DATA_DIR.glob("*.csv"):
58
+ table_name = csv_file.stem
59
+ conn.execute(f"CREATE TABLE {table_name} AS SELECT * FROM read_csv_auto('{csv_file}')")
60
+
61
  query_result = conn.execute(query).fetch_df().reset_index()
 
62
  return query_result
63
  except Exception as e:
64
+ print(f"Error executing query: {e}")
 
65
  raise e
66
  def get_summarization(client:groq.Groq,
67
  use_question:str,
 
251
  * Ensure that the entire output is returned on only one single line
252
  * Keep your query as simple and straightforward as possible; do not use subqueries
253
  """
 
 
 
 
254
  user_question = """"""
255
 
256
  # And some rules for querying the dataset:
 
261
 
262
 
263
  def upload_file(files) -> List[str]:
 
264
  model = "llama3-8b-8192"
265
+ api_key: str = userdata.get("GROQ_API_KEY")
266
+
267
+ if isinstance(files, str):
 
268
  files = [files]
269
+
270
  stored_paths = []
271
  stored_table_descriptions = []
272
  tables = []
273
+
274
  for file in files:
275
  filename = Path(file.name).name
276
+ path = DATA_DIR / filename
277
 
278
  # Copy the content of the temporary file to our destination
279
+ shutil.copy2(file.name, path)
 
280
 
281
+ stored_paths.append(str(path))
282
+ table_description = identify_column_datatypes_to_SQL_DEF(pd.read_csv(path), api_key, model)
283
+ desc = f"Table: {filename}\nColumns:\n{table_description}"
284
  stored_table_descriptions.append(desc)
285
  tables.append(filename)
286
+
287
+ # Update global variables
288
+ global table_description, tables_string, table_1, table_1_wt_xt
289
+ table_description = "\n".join(stored_table_descriptions)
290
  tables_string = join_with_and(tables)
291
+ table_1 = tables[0] if tables else ""
292
+ table_1_wt_xt = table_1.split('.')[0] if table_1 else ""
293
+
294
+ return "\n".join(stored_table_descriptions)
 
 
 
295
 
296
  def user_prompt_sanitization(user_prompt:str)->str:
297
  guide = """
 
376
  upload_output = gr.Textbox(label="Upload Status", lines=5)
377
 
378
  upload_button.click(upload_file, inputs=file_output, outputs=upload_output)
379
+
380
  with gr.Tab("Query Interface"):
381
  chatbot = gr.Chatbot()
382
  with gr.Row():
 
384
  submit_button = gr.Button("Submit")
385
  submit_button.click(queryModel, inputs=[user_input], outputs=chatbot)
386
 
387
+ demo.launch()
 
 
388
 
389