Spaces:
Sleeping
Sleeping
# app/models/train_models.py | |
import sys | |
import os | |
# Add backend directory to PYTHONPATH | |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../'))) | |
import pandas as pd | |
from sklearn.linear_model import LinearRegression | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.preprocessing import LabelEncoder | |
import joblib | |
import os | |
# Fetch data from Supabase | |
from database import supabase | |
response = supabase.table("HR_analysis").select("*").execute() | |
df = pd.DataFrame(response.data) if response.data else pd.DataFrame() | |
# Encode categorical data | |
label_enc = LabelEncoder() | |
df['Performance Score'] = label_enc.fit_transform(df['Performance Score']) | |
df['EmployeeStatus'] = label_enc.fit_transform(df['EmployeeStatus']) | |
df['Training Outcome'] = label_enc.fit_transform(df['Training Outcome']) | |
df['Training Type'] = label_enc.fit_transform(df['Training Type']) | |
# Save label encoder | |
joblib.dump(label_enc, 'models/label_encoder.pkl') | |
# Prepare training data | |
X_satisfaction = df[['Engagement Score', 'Work-Life Balance Score', 'Performance Score']] | |
y_satisfaction = df['Satisfaction Score'] | |
X_performance = df[['Satisfaction Score', 'Engagement Score', 'Training Duration(Days)', 'Training Cost']] | |
y_performance = df['Current Employee Rating'] | |
X_retention = df[['Satisfaction Score', 'Engagement Score', 'Performance Score']] | |
y_retention = df['EmployeeStatus'] | |
X_training = df[['Training Type', 'Training Duration(Days)', 'Training Cost']] | |
y_training = df['Training Outcome'] | |
# Train and Save Models | |
print("Training models...") | |
# Linear Regression Models | |
satisfaction_model = LinearRegression() | |
satisfaction_model.fit(X_satisfaction, y_satisfaction) | |
joblib.dump(satisfaction_model, 'models/satisfaction_model.pkl') | |
performance_model = LinearRegression() | |
performance_model.fit(X_performance, y_performance) | |
joblib.dump(performance_model, 'models/performance_model.pkl') | |
# Classification Models | |
retention_model = RandomForestClassifier() | |
retention_model.fit(X_retention, y_retention) | |
joblib.dump(retention_model, 'models/retention_model.pkl') | |
training_model = RandomForestClassifier() | |
training_model.fit(X_training, y_training) | |
joblib.dump(training_model, 'models/training_model.pkl') | |
print("β Models trained and saved successfully!") | |