Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,14 @@
|
|
1 |
import os
|
2 |
import joblib
|
3 |
import pandas as pd
|
4 |
-
import streamlit as st
|
5 |
-
from sklearn.model_selection import train_test_split
|
6 |
from sklearn.metrics import r2_score
|
7 |
from typing import List, Dict, Any
|
|
|
8 |
|
9 |
# Constants for directories and file names
|
10 |
MODEL_DIR = 'models'
|
11 |
DATA_DIR = 'datasets'
|
12 |
-
DATA_FILE = '
|
13 |
MODEL_NAMES = [
|
14 |
'CatBoost Regressor',
|
15 |
'XGBoost Regressor',
|
@@ -24,7 +23,7 @@ def load_models(model_names: List[str]) -> Dict[str, Any]:
|
|
24 |
try:
|
25 |
models[name] = joblib.load(path)
|
26 |
except Exception as e:
|
27 |
-
|
28 |
return models
|
29 |
|
30 |
# Load models
|
@@ -37,7 +36,6 @@ df = pd.read_csv(data_path)
|
|
37 |
# Prepare features and target
|
38 |
X = df.drop(columns=['Salary'])
|
39 |
y = df['Salary']
|
40 |
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)
|
41 |
|
42 |
# Pre-defined input choices
|
43 |
input_choices = {
|
@@ -53,83 +51,44 @@ max_comp = float(df.CompTotal.max() * 1.5)
|
|
53 |
default_years = 3.0 # Default years of experience
|
54 |
max_years = float(df.YearsOfExperience.max() * 1.5)
|
55 |
|
56 |
-
|
57 |
-
y_train_predictions = {name: model.predict(X_train) for name, model in models.items()}
|
58 |
-
|
59 |
-
def load_and_predict(sample: pd.DataFrame) -> pd.DataFrame:
|
60 |
"""Predict salary using loaded models and evaluate statistics."""
|
|
|
|
|
|
|
|
|
|
|
61 |
results = []
|
62 |
|
63 |
for name, model in models.items():
|
64 |
try:
|
65 |
-
salary_pred = model.predict(
|
66 |
results.append({
|
67 |
'Model': name,
|
68 |
'Predicted Salary': salary_pred,
|
69 |
-
'R2 Score (%)': r2_score(
|
70 |
})
|
71 |
except Exception as e:
|
72 |
-
|
73 |
-
|
74 |
return pd.DataFrame(results).sort_values(by='R2 Score (%)', ascending=False).reset_index(drop=True)
|
75 |
|
76 |
-
#
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
remotework = st.sidebar.selectbox("**Remote Work**", options=input_choices['RemoteWork'])
|
86 |
-
comptotal = st.sidebar.number_input("**CompTotal**", min_value=0.0, max_value=max_comp, value=default_comp)
|
87 |
-
yearsofexperience = st.sidebar.number_input("**Years of Experience**", min_value=0.0, max_value=max_years, value=default_years)
|
88 |
-
|
89 |
-
# Handling predictions
|
90 |
-
if st.sidebar.button(label=':rainbow[Predict Salary]'):
|
91 |
-
input_data = pd.DataFrame(
|
92 |
-
[[mainbranch, country, educationlevel, remotework, comptotal, yearsofexperience]],
|
93 |
-
columns=['MainBranch', 'Country', 'EducationLevel', 'RemoteWork', 'CompTotal', 'YearsOfExperience'])
|
94 |
-
|
95 |
-
results_df = load_and_predict(input_data)
|
96 |
-
|
97 |
-
if not results_df.empty:
|
98 |
-
st.write("### Prediction Results:")
|
99 |
-
st.dataframe(results_df)
|
100 |
|
101 |
-
|
102 |
-
st.markdown("---")
|
103 |
-
st.text('''
|
104 |
-
>> Developer Salary Prediction App <<
|
105 |
-
This Streamlit application predicts developer salary using multiple machine learning models including LGBM, XGBoost, and Random Forest regressors.
|
106 |
-
Users can input developer information through a user-friendly interface, which includes fields such as country, education level, and years of experience.
|
107 |
-
|
108 |
-
> Features:
|
109 |
-
**Input Components**:
|
110 |
-
- **MainBranch**: Select your main area of expertise in development, such as software engineering, data science, or web development. This selection may influence salary expectations based on the branch's demand and trends.
|
111 |
-
|
112 |
-
- **Country**: Choose your country from the dropdown list. Regions often exhibit varying salary scales due to economic factors, the cost of living, and market demand for tech workers.
|
113 |
-
|
114 |
-
- **Education Level**: Indicate the highest level of education you have completed. Higher educational qualifications often correlate with higher earning potential in the tech industry.
|
115 |
-
|
116 |
-
- **Remote Work**: Specify whether you primarily work remotely, on-site, or in a hybrid setup. Remote work setups can affect salary offers, especially if hiring companies are based in different geographic areas.
|
117 |
-
|
118 |
-
- **CompTotal**: Enter your expected total compensation, which includes salary, bonuses, and other benefits. This field is crucial for setting a base for salary predictions and facilitates comparisons.
|
119 |
-
|
120 |
-
- **Years of Experience**: Provide the number of years you've been in a coding-related job. Generally, more years of experience are associated with higher salaries due to skill accumulation and professional development.
|
121 |
-
|
122 |
-
**Data Processing**:
|
123 |
-
- The app employs a pre-processed dataset, cleaned and prepared for model training.
|
124 |
-
- It utilizes features including country, education level, and years of experience for predictions.
|
125 |
-
- Models are loaded from disk, obtaining predictions based on user-provided input.
|
126 |
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
This application serves educational purposes. Predictions are not guaranteed to be accurate.
|
135 |
-
''')
|
|
|
1 |
import os
|
2 |
import joblib
|
3 |
import pandas as pd
|
|
|
|
|
4 |
from sklearn.metrics import r2_score
|
5 |
from typing import List, Dict, Any
|
6 |
+
import gradio as gr
|
7 |
|
8 |
# Constants for directories and file names
|
9 |
MODEL_DIR = 'models'
|
10 |
DATA_DIR = 'datasets'
|
11 |
+
DATA_FILE = 'cleaned_survey_results_public_v2.csv'
|
12 |
MODEL_NAMES = [
|
13 |
'CatBoost Regressor',
|
14 |
'XGBoost Regressor',
|
|
|
23 |
try:
|
24 |
models[name] = joblib.load(path)
|
25 |
except Exception as e:
|
26 |
+
print(f"Error loading model {name}: {str(e)}") # Use print for logging in Gradio
|
27 |
return models
|
28 |
|
29 |
# Load models
|
|
|
36 |
# Prepare features and target
|
37 |
X = df.drop(columns=['Salary'])
|
38 |
y = df['Salary']
|
|
|
39 |
|
40 |
# Pre-defined input choices
|
41 |
input_choices = {
|
|
|
51 |
default_years = 3.0 # Default years of experience
|
52 |
max_years = float(df.YearsOfExperience.max() * 1.5)
|
53 |
|
54 |
+
def load_and_predict(mainbranch, country, educationlevel, remotework, comptotal, yearsofexperience):
|
|
|
|
|
|
|
55 |
"""Predict salary using loaded models and evaluate statistics."""
|
56 |
+
input_data = pd.DataFrame(
|
57 |
+
[[mainbranch, country, educationlevel, remotework, comptotal, yearsofexperience]],
|
58 |
+
columns=['MainBranch', 'Country', 'EducationLevel', 'RemoteWork', 'CompTotal', 'YearsOfExperience']
|
59 |
+
)
|
60 |
+
|
61 |
results = []
|
62 |
|
63 |
for name, model in models.items():
|
64 |
try:
|
65 |
+
salary_pred = model.predict(input_data)[0]
|
66 |
results.append({
|
67 |
'Model': name,
|
68 |
'Predicted Salary': salary_pred,
|
69 |
+
'R2 Score (%)': r2_score(y, model.predict(X)) * 100, # Re-calculate R2 score on the entire dataset
|
70 |
})
|
71 |
except Exception as e:
|
72 |
+
print(f"Error during prediction with model {name}: {str(e)}") # Logging
|
73 |
+
|
74 |
return pd.DataFrame(results).sort_values(by='R2 Score (%)', ascending=False).reset_index(drop=True)
|
75 |
|
76 |
+
# Gradio interface
|
77 |
+
inputs = [
|
78 |
+
gr.Dropdown(choices=input_choices['MainBranch'], label="Main Branch"),
|
79 |
+
gr.Dropdown(choices=input_choices['Country'], label="Country"),
|
80 |
+
gr.Dropdown(choices=input_choices['EducationLevel'], label="Education Level"),
|
81 |
+
gr.Dropdown(choices=input_choices['RemoteWork'], label="Remote Work"),
|
82 |
+
gr.Number(minimum=0.0, maximum=max_comp, value=default_comp, step=0.5, label="CompTotal"),
|
83 |
+
gr.Number(minimum=0.0, maximum=50, value=default_years, step=0.5, label="Years of Experience"),
|
84 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
+
output = gr.Dataframe(label="Prediction Results")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
+
gr.Interface(
|
89 |
+
fn=load_and_predict,
|
90 |
+
inputs=inputs,
|
91 |
+
outputs=output,
|
92 |
+
title="Developer Salary Prediction App",
|
93 |
+
description="This application predicts developer salaries using multiple machine learning models. Provide your details to get salary predictions.",
|
94 |
+
).launch()
|
|
|
|