ProfessorLeVesseur commited on
Commit
907c533
·
verified ·
1 Parent(s): fc8abe1

Update data_processor.py

Browse files
Files changed (1) hide show
  1. data_processor.py +15 -4
data_processor.py CHANGED
@@ -184,14 +184,16 @@
184
 
185
 
186
 
187
-
188
  import re
189
  import pandas as pd
190
  import os
191
  from huggingface_hub import InferenceClient
192
 
193
  class DataProcessor:
194
- INTERVENTION_COLUMN = 'Did the intervention happen today?'
 
 
 
195
  ENGAGED_STR = 'Engaged'
196
  PARTIALLY_ENGAGED_STR = 'Partially Engaged'
197
  NOT_ENGAGED_STR = 'Not Engaged'
@@ -202,7 +204,7 @@ class DataProcessor:
202
  raise ValueError("HF_API_KEY not set in environment variables")
203
  self.client = InferenceClient(api_key=self.hf_api_key)
204
  self.student_metrics_df = student_metrics_df
205
-
206
 
207
  def read_excel(self, uploaded_file):
208
  return pd.read_excel(uploaded_file)
@@ -256,9 +258,17 @@ class DataProcessor:
256
  df.columns = updated_columns
257
  return df
258
 
 
 
 
 
 
 
 
259
  def compute_intervention_statistics(self, df):
 
260
  total_days = len(df)
261
- sessions_held = df[self.INTERVENTION_COLUMN].str.strip().str.lower().eq('yes').sum()
262
  intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
263
  return pd.DataFrame({
264
  'Intervention Dosage (%)': [round(intervention_frequency, 0)],
@@ -266,6 +276,7 @@ class DataProcessor:
266
  'Intervention Sessions Not Held': [total_days - sessions_held],
267
  'Total Number of Days Available': [total_days]
268
  })
 
269
 
270
  def classify_engagement(self, engagement_str):
271
  engagement_str = engagement_str.lower()
 
184
 
185
 
186
 
 
187
  import re
188
  import pandas as pd
189
  import os
190
  from huggingface_hub import InferenceClient
191
 
192
  class DataProcessor:
193
+ INTERVENTION_COLUMN_OPTIONS = [
194
+ 'Did the intervention happen today?',
195
+ 'Did the Intervention Take Place Today?'
196
+ ]
197
  ENGAGED_STR = 'Engaged'
198
  PARTIALLY_ENGAGED_STR = 'Partially Engaged'
199
  NOT_ENGAGED_STR = 'Not Engaged'
 
204
  raise ValueError("HF_API_KEY not set in environment variables")
205
  self.client = InferenceClient(api_key=self.hf_api_key)
206
  self.student_metrics_df = student_metrics_df
207
+ self.intervention_column = None # Will be set when processing data
208
 
209
  def read_excel(self, uploaded_file):
210
  return pd.read_excel(uploaded_file)
 
258
  df.columns = updated_columns
259
  return df
260
 
261
+ def find_intervention_column(self, df):
262
+ for column in self.INTERVENTION_COLUMN_OPTIONS:
263
+ if column in df.columns:
264
+ self.intervention_column = column
265
+ return column
266
+ raise ValueError("No intervention column found in the dataframe.")
267
+
268
  def compute_intervention_statistics(self, df):
269
+ intervention_column = self.find_intervention_column(df)
270
  total_days = len(df)
271
+ sessions_held = df[intervention_column].str.strip().str.lower().eq('yes').sum()
272
  intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
273
  return pd.DataFrame({
274
  'Intervention Dosage (%)': [round(intervention_frequency, 0)],
 
276
  'Intervention Sessions Not Held': [total_days - sessions_held],
277
  'Total Number of Days Available': [total_days]
278
  })
279
+
280
 
281
  def classify_engagement(self, engagement_str):
282
  engagement_str = engagement_str.lower()