# app/models/train_models.py import sys import os # Add backend directory to PYTHONPATH sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../'))) import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import LabelEncoder import joblib import os # Fetch data from Supabase from database import supabase response = supabase.table("HR_analysis").select("*").execute() df = pd.DataFrame(response.data) if response.data else pd.DataFrame() # Encode categorical data label_enc = LabelEncoder() df['Performance Score'] = label_enc.fit_transform(df['Performance Score']) df['EmployeeStatus'] = label_enc.fit_transform(df['EmployeeStatus']) df['Training Outcome'] = label_enc.fit_transform(df['Training Outcome']) df['Training Type'] = label_enc.fit_transform(df['Training Type']) # Save label encoder joblib.dump(label_enc, 'models/label_encoder.pkl') # Prepare training data X_satisfaction = df[['Engagement Score', 'Work-Life Balance Score', 'Performance Score']] y_satisfaction = df['Satisfaction Score'] X_performance = df[['Satisfaction Score', 'Engagement Score', 'Training Duration(Days)', 'Training Cost']] y_performance = df['Current Employee Rating'] X_retention = df[['Satisfaction Score', 'Engagement Score', 'Performance Score']] y_retention = df['EmployeeStatus'] X_training = df[['Training Type', 'Training Duration(Days)', 'Training Cost']] y_training = df['Training Outcome'] # Train and Save Models print("Training models...") # Linear Regression Models satisfaction_model = LinearRegression() satisfaction_model.fit(X_satisfaction, y_satisfaction) joblib.dump(satisfaction_model, 'models/satisfaction_model.pkl') performance_model = LinearRegression() performance_model.fit(X_performance, y_performance) joblib.dump(performance_model, 'models/performance_model.pkl') # Classification Models retention_model = RandomForestClassifier() retention_model.fit(X_retention, y_retention) joblib.dump(retention_model, 'models/retention_model.pkl') training_model = RandomForestClassifier() training_model.fit(X_training, y_training) joblib.dump(training_model, 'models/training_model.pkl') print("✅ Models trained and saved successfully!")