cap_backend / models /train_models.py
logeswari's picture
commit the changes
efcc6b0
# app/models/train_models.py
import sys
import os
# Add backend directory to PYTHONPATH
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import joblib
import os
# Fetch data from Supabase
from database import supabase
response = supabase.table("HR_analysis").select("*").execute()
df = pd.DataFrame(response.data) if response.data else pd.DataFrame()
# Encode categorical data
label_enc = LabelEncoder()
df['Performance Score'] = label_enc.fit_transform(df['Performance Score'])
df['EmployeeStatus'] = label_enc.fit_transform(df['EmployeeStatus'])
df['Training Outcome'] = label_enc.fit_transform(df['Training Outcome'])
df['Training Type'] = label_enc.fit_transform(df['Training Type'])
# Save label encoder
joblib.dump(label_enc, 'models/label_encoder.pkl')
# Prepare training data
X_satisfaction = df[['Engagement Score', 'Work-Life Balance Score', 'Performance Score']]
y_satisfaction = df['Satisfaction Score']
X_performance = df[['Satisfaction Score', 'Engagement Score', 'Training Duration(Days)', 'Training Cost']]
y_performance = df['Current Employee Rating']
X_retention = df[['Satisfaction Score', 'Engagement Score', 'Performance Score']]
y_retention = df['EmployeeStatus']
X_training = df[['Training Type', 'Training Duration(Days)', 'Training Cost']]
y_training = df['Training Outcome']
# Train and Save Models
print("Training models...")
# Linear Regression Models
satisfaction_model = LinearRegression()
satisfaction_model.fit(X_satisfaction, y_satisfaction)
joblib.dump(satisfaction_model, 'models/satisfaction_model.pkl')
performance_model = LinearRegression()
performance_model.fit(X_performance, y_performance)
joblib.dump(performance_model, 'models/performance_model.pkl')
# Classification Models
retention_model = RandomForestClassifier()
retention_model.fit(X_retention, y_retention)
joblib.dump(retention_model, 'models/retention_model.pkl')
training_model = RandomForestClassifier()
training_model.fit(X_training, y_training)
joblib.dump(training_model, 'models/training_model.pkl')
print("βœ… Models trained and saved successfully!")