nhosseini commited on
Commit
b94eb4c
·
verified ·
1 Parent(s): 81034d0

Update app.py

Browse files

update preprocess

Files changed (1) hide show
  1. app.py +7 -9
app.py CHANGED
@@ -46,7 +46,7 @@ def process_table_query(query, table_data):
46
  # If the results are not numerical, return the joined string
47
  return ' '.join(results)
48
 
49
- # Gradio interface
50
  def answer_query_from_csv(query, file):
51
  """
52
  Function to handle file input and return model results.
@@ -54,15 +54,12 @@ def answer_query_from_csv(query, file):
54
  # Read the file into a DataFrame
55
  table_data = pd.read_csv(file)
56
 
57
- # Convert object-type columns to lowercase (if they are valid strings)
58
  for column in table_data.columns:
59
- if table_data[column].dtype == 'object':
60
  table_data[column] = table_data[column].apply(lambda x: x.lower() if isinstance(x, str) else x)
61
 
62
- # Convert all table cells to strings for TAPEX compatibility
63
- table_data = table_data.astype(str)
64
-
65
- # Extract year, month, day, and time components for datetime columns
66
  for column in table_data.columns:
67
  if pd.api.types.is_datetime64_any_dtype(table_data[column]):
68
  table_data[f'{column}_year'] = table_data[column].dt.year
@@ -70,15 +67,16 @@ def answer_query_from_csv(query, file):
70
  table_data[f'{column}_day'] = table_data[column].dt.day
71
  table_data[f'{column}_time'] = table_data[column].dt.strftime('%H:%M:%S')
72
 
73
- # Process the CSV file and query using TAPEX
74
  result_tapex = process_table_query(query, table_data)
75
-
76
  # Process the query using TAPAS pipelines
77
  result_tapas = pipe_tapas(table=table_data, query=query)['cells'][0]
78
  result_tapas2 = pipe_tapas2(table=table_data, query=query)['cells'][0]
79
 
80
  return result_tapex, result_tapas, result_tapas2
81
 
 
82
  # Create Gradio interface
83
  with gr.Blocks() as interface:
84
  gr.Markdown("# Table Question Answering with TAPEX and TAPAS Models")
 
46
  # If the results are not numerical, return the joined string
47
  return ' '.join(results)
48
 
49
+
50
  def answer_query_from_csv(query, file):
51
  """
52
  Function to handle file input and return model results.
 
54
  # Read the file into a DataFrame
55
  table_data = pd.read_csv(file)
56
 
57
+ # Convert object-type columns (text) to lowercase, leaving numeric columns as is
58
  for column in table_data.columns:
59
+ if table_data[column].dtype == 'object': # Only apply to text columns
60
  table_data[column] = table_data[column].apply(lambda x: x.lower() if isinstance(x, str) else x)
61
 
62
+ # Handle datetime columns to extract components
 
 
 
63
  for column in table_data.columns:
64
  if pd.api.types.is_datetime64_any_dtype(table_data[column]):
65
  table_data[f'{column}_year'] = table_data[column].dt.year
 
67
  table_data[f'{column}_day'] = table_data[column].dt.day
68
  table_data[f'{column}_time'] = table_data[column].dt.strftime('%H:%M:%S')
69
 
70
+ # Now process the table and query
71
  result_tapex = process_table_query(query, table_data)
72
+
73
  # Process the query using TAPAS pipelines
74
  result_tapas = pipe_tapas(table=table_data, query=query)['cells'][0]
75
  result_tapas2 = pipe_tapas2(table=table_data, query=query)['cells'][0]
76
 
77
  return result_tapex, result_tapas, result_tapas2
78
 
79
+
80
  # Create Gradio interface
81
  with gr.Blocks() as interface:
82
  gr.Markdown("# Table Question Answering with TAPEX and TAPAS Models")