Update data_processor.py
Browse files- data_processor.py +15 -4
data_processor.py
CHANGED
@@ -184,14 +184,16 @@
|
|
184 |
|
185 |
|
186 |
|
187 |
-
|
188 |
import re
|
189 |
import pandas as pd
|
190 |
import os
|
191 |
from huggingface_hub import InferenceClient
|
192 |
|
193 |
class DataProcessor:
|
194 |
-
|
|
|
|
|
|
|
195 |
ENGAGED_STR = 'Engaged'
|
196 |
PARTIALLY_ENGAGED_STR = 'Partially Engaged'
|
197 |
NOT_ENGAGED_STR = 'Not Engaged'
|
@@ -202,7 +204,7 @@ class DataProcessor:
|
|
202 |
raise ValueError("HF_API_KEY not set in environment variables")
|
203 |
self.client = InferenceClient(api_key=self.hf_api_key)
|
204 |
self.student_metrics_df = student_metrics_df
|
205 |
-
|
206 |
|
207 |
def read_excel(self, uploaded_file):
|
208 |
return pd.read_excel(uploaded_file)
|
@@ -256,9 +258,17 @@ class DataProcessor:
|
|
256 |
df.columns = updated_columns
|
257 |
return df
|
258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
def compute_intervention_statistics(self, df):
|
|
|
260 |
total_days = len(df)
|
261 |
-
sessions_held = df[
|
262 |
intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
|
263 |
return pd.DataFrame({
|
264 |
'Intervention Dosage (%)': [round(intervention_frequency, 0)],
|
@@ -266,6 +276,7 @@ class DataProcessor:
|
|
266 |
'Intervention Sessions Not Held': [total_days - sessions_held],
|
267 |
'Total Number of Days Available': [total_days]
|
268 |
})
|
|
|
269 |
|
270 |
def classify_engagement(self, engagement_str):
|
271 |
engagement_str = engagement_str.lower()
|
|
|
184 |
|
185 |
|
186 |
|
|
|
187 |
import re
|
188 |
import pandas as pd
|
189 |
import os
|
190 |
from huggingface_hub import InferenceClient
|
191 |
|
192 |
class DataProcessor:
|
193 |
+
INTERVENTION_COLUMN_OPTIONS = [
|
194 |
+
'Did the intervention happen today?',
|
195 |
+
'Did the Intervention Take Place Today?'
|
196 |
+
]
|
197 |
ENGAGED_STR = 'Engaged'
|
198 |
PARTIALLY_ENGAGED_STR = 'Partially Engaged'
|
199 |
NOT_ENGAGED_STR = 'Not Engaged'
|
|
|
204 |
raise ValueError("HF_API_KEY not set in environment variables")
|
205 |
self.client = InferenceClient(api_key=self.hf_api_key)
|
206 |
self.student_metrics_df = student_metrics_df
|
207 |
+
self.intervention_column = None # Will be set when processing data
|
208 |
|
209 |
def read_excel(self, uploaded_file):
|
210 |
return pd.read_excel(uploaded_file)
|
|
|
258 |
df.columns = updated_columns
|
259 |
return df
|
260 |
|
261 |
+
def find_intervention_column(self, df):
|
262 |
+
for column in self.INTERVENTION_COLUMN_OPTIONS:
|
263 |
+
if column in df.columns:
|
264 |
+
self.intervention_column = column
|
265 |
+
return column
|
266 |
+
raise ValueError("No intervention column found in the dataframe.")
|
267 |
+
|
268 |
def compute_intervention_statistics(self, df):
|
269 |
+
intervention_column = self.find_intervention_column(df)
|
270 |
total_days = len(df)
|
271 |
+
sessions_held = df[intervention_column].str.strip().str.lower().eq('yes').sum()
|
272 |
intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
|
273 |
return pd.DataFrame({
|
274 |
'Intervention Dosage (%)': [round(intervention_frequency, 0)],
|
|
|
276 |
'Intervention Sessions Not Held': [total_days - sessions_held],
|
277 |
'Total Number of Days Available': [total_days]
|
278 |
})
|
279 |
+
|
280 |
|
281 |
def classify_engagement(self, engagement_str):
|
282 |
engagement_str = engagement_str.lower()
|