Caleb Fahlgren commited on
Commit
44cb622
·
1 Parent(s): a00be78

create view and get ddl for parquet

Browse files
Files changed (1) hide show
  1. app.py +24 -7
app.py CHANGED
@@ -11,7 +11,9 @@ hf_api = HfApi()
11
  conn = duckdb.connect()
12
 
13
 
14
- def query_dataset(dataset_id: str, query: str) -> pd.DataFrame:
 
 
15
  response = requests.get(f"{BASE_DATASETS_SERVER_URL}/parquet?dataset={dataset_id}")
16
  response.raise_for_status() # Check if the request was successful
17
 
@@ -21,10 +23,24 @@ def query_dataset(dataset_id: str, query: str) -> pd.DataFrame:
21
  if not first_parquet_url:
22
  raise ValueError("No valid URL found for the first parquet file.")
23
 
24
- sql_query = f"SELECT * FROM read_parquet('{first_parquet_url}') limit 100;"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- df = conn.execute(sql_query).fetchdf()
27
- return df
28
 
29
 
30
  with gr.Blocks() as demo:
@@ -39,11 +55,12 @@ with gr.Blocks() as demo:
39
 
40
  btn = gr.Button("Ask 🪄")
41
  df = gr.DataFrame(datatype="markdown")
 
42
 
43
  btn.click(
44
- query_dataset,
45
- inputs=[dataset_name, query_input],
46
- outputs=[df],
47
  )
48
 
49
 
 
11
  conn = duckdb.connect()
12
 
13
 
14
+ def get_dataset_ddl(dataset_id: str) -> pd.DataFrame:
15
+ view_name = "dataset_view"
16
+
17
  response = requests.get(f"{BASE_DATASETS_SERVER_URL}/parquet?dataset={dataset_id}")
18
  response.raise_for_status() # Check if the request was successful
19
 
 
23
  if not first_parquet_url:
24
  raise ValueError("No valid URL found for the first parquet file.")
25
 
26
+ conn.execute(
27
+ f"CREATE OR REPLACE VIEW {view_name} as SELECT * FROM read_parquet('{first_parquet_url}');"
28
+ )
29
+ dataset_ddl = conn.execute(f"PRAGMA table_info('{view_name}');").fetchall()
30
+
31
+ column_data_types = ",\n\t".join(
32
+ [f"{column[1]} {column[2]}" for column in dataset_ddl]
33
+ )
34
+
35
+ sql_ddl = """
36
+ CREATE TABLE {} (
37
+ {}
38
+ );
39
+ """.format(
40
+ view_name, column_data_types
41
+ )
42
 
43
+ return sql_ddl
 
44
 
45
 
46
  with gr.Blocks() as demo:
 
55
 
56
  btn = gr.Button("Ask 🪄")
57
  df = gr.DataFrame(datatype="markdown")
58
+ ddl = gr.Text("")
59
 
60
  btn.click(
61
+ get_dataset_ddl,
62
+ inputs=[dataset_name],
63
+ outputs=[ddl],
64
  )
65
 
66