AfshinMA commited on
Commit
3d769d6
·
verified ·
1 Parent(s): 2feac6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -71
app.py CHANGED
@@ -1,15 +1,14 @@
1
  import os
2
  import joblib
3
  import pandas as pd
4
- import streamlit as st
5
- from sklearn.model_selection import train_test_split
6
  from sklearn.metrics import r2_score
7
  from typing import List, Dict, Any
 
8
 
9
  # Constants for directories and file names
10
  MODEL_DIR = 'models'
11
  DATA_DIR = 'datasets'
12
- DATA_FILE = 'cleaned_survey_results_public.csv'
13
  MODEL_NAMES = [
14
  'CatBoost Regressor',
15
  'XGBoost Regressor',
@@ -24,7 +23,7 @@ def load_models(model_names: List[str]) -> Dict[str, Any]:
24
  try:
25
  models[name] = joblib.load(path)
26
  except Exception as e:
27
- st.error(f"Error loading model {name}: {str(e)}")
28
  return models
29
 
30
  # Load models
@@ -37,7 +36,6 @@ df = pd.read_csv(data_path)
37
  # Prepare features and target
38
  X = df.drop(columns=['Salary'])
39
  y = df['Salary']
40
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)
41
 
42
  # Pre-defined input choices
43
  input_choices = {
@@ -53,83 +51,44 @@ max_comp = float(df.CompTotal.max() * 1.5)
53
  default_years = 3.0 # Default years of experience
54
  max_years = float(df.YearsOfExperience.max() * 1.5)
55
 
56
- # Precompute predictions for training set
57
- y_train_predictions = {name: model.predict(X_train) for name, model in models.items()}
58
-
59
- def load_and_predict(sample: pd.DataFrame) -> pd.DataFrame:
60
  """Predict salary using loaded models and evaluate statistics."""
 
 
 
 
 
61
  results = []
62
 
63
  for name, model in models.items():
64
  try:
65
- salary_pred = model.predict(sample)[0]
66
  results.append({
67
  'Model': name,
68
  'Predicted Salary': salary_pred,
69
- 'R2 Score (%)': r2_score(y_train, y_train_predictions[name]) * 100,
70
  })
71
  except Exception as e:
72
- st.error(f"Error during prediction with model {name}: {str(e)}")
73
-
74
  return pd.DataFrame(results).sort_values(by='R2 Score (%)', ascending=False).reset_index(drop=True)
75
 
76
- # Streamlit UI setup
77
- st.set_page_config(page_title="Developer Salary Prediction App", page_icon="🤑", layout="wide")
78
- st.title("🤑 **Developer Salary Prediction**")
79
-
80
- # Sidebar inputs
81
- st.sidebar.header("Input Information")
82
- mainbranch = st.sidebar.selectbox("**MainBranch**", options=input_choices['MainBranch'])
83
- country = st.sidebar.selectbox("**Country**", options=input_choices['Country'])
84
- educationlevel = st.sidebar.selectbox("**Education Level**", options=input_choices['EducationLevel'])
85
- remotework = st.sidebar.selectbox("**Remote Work**", options=input_choices['RemoteWork'])
86
- comptotal = st.sidebar.number_input("**CompTotal**", min_value=0.0, max_value=max_comp, value=default_comp)
87
- yearsofexperience = st.sidebar.number_input("**Years of Experience**", min_value=0.0, max_value=max_years, value=default_years)
88
-
89
- # Handling predictions
90
- if st.sidebar.button(label=':rainbow[Predict Salary]'):
91
- input_data = pd.DataFrame(
92
- [[mainbranch, country, educationlevel, remotework, comptotal, yearsofexperience]],
93
- columns=['MainBranch', 'Country', 'EducationLevel', 'RemoteWork', 'CompTotal', 'YearsOfExperience'])
94
-
95
- results_df = load_and_predict(input_data)
96
-
97
- if not results_df.empty:
98
- st.write("### Prediction Results:")
99
- st.dataframe(results_df)
100
 
101
- # Disclaimer Section
102
- st.markdown("---")
103
- st.text('''
104
- >> Developer Salary Prediction App <<
105
- This Streamlit application predicts developer salary using multiple machine learning models including LGBM, XGBoost, and Random Forest regressors.
106
- Users can input developer information through a user-friendly interface, which includes fields such as country, education level, and years of experience.
107
-
108
- > Features:
109
- **Input Components**:
110
- - **MainBranch**: Select your main area of expertise in development, such as software engineering, data science, or web development. This selection may influence salary expectations based on the branch's demand and trends.
111
-
112
- - **Country**: Choose your country from the dropdown list. Regions often exhibit varying salary scales due to economic factors, the cost of living, and market demand for tech workers.
113
-
114
- - **Education Level**: Indicate the highest level of education you have completed. Higher educational qualifications often correlate with higher earning potential in the tech industry.
115
-
116
- - **Remote Work**: Specify whether you primarily work remotely, on-site, or in a hybrid setup. Remote work setups can affect salary offers, especially if hiring companies are based in different geographic areas.
117
-
118
- - **CompTotal**: Enter your expected total compensation, which includes salary, bonuses, and other benefits. This field is crucial for setting a base for salary predictions and facilitates comparisons.
119
-
120
- - **Years of Experience**: Provide the number of years you've been in a coding-related job. Generally, more years of experience are associated with higher salaries due to skill accumulation and professional development.
121
-
122
- **Data Processing**:
123
- - The app employs a pre-processed dataset, cleaned and prepared for model training.
124
- - It utilizes features including country, education level, and years of experience for predictions.
125
- - Models are loaded from disk, obtaining predictions based on user-provided input.
126
 
127
- **Prediction**: The app performs predictions with loaded models and calculates performance metrics like R2 score.
128
- **Results Display**: The predicted salary and model performance metrics are presented in a user-friendly format.
129
-
130
- > Usage:
131
- Fill out the developer information and click "Predict Salary" to derive insights on anticipated salary and model performance.
132
-
133
- > Disclaimer:
134
- This application serves educational purposes. Predictions are not guaranteed to be accurate.
135
- ''')
 
1
  import os
2
  import joblib
3
  import pandas as pd
 
 
4
  from sklearn.metrics import r2_score
5
  from typing import List, Dict, Any
6
+ import gradio as gr
7
 
8
  # Constants for directories and file names
9
  MODEL_DIR = 'models'
10
  DATA_DIR = 'datasets'
11
+ DATA_FILE = 'cleaned_survey_results_public_v2.csv'
12
  MODEL_NAMES = [
13
  'CatBoost Regressor',
14
  'XGBoost Regressor',
 
23
  try:
24
  models[name] = joblib.load(path)
25
  except Exception as e:
26
+ print(f"Error loading model {name}: {str(e)}") # Use print for logging in Gradio
27
  return models
28
 
29
  # Load models
 
36
  # Prepare features and target
37
  X = df.drop(columns=['Salary'])
38
  y = df['Salary']
 
39
 
40
  # Pre-defined input choices
41
  input_choices = {
 
51
  default_years = 3.0 # Default years of experience
52
  max_years = float(df.YearsOfExperience.max() * 1.5)
53
 
54
+ def load_and_predict(mainbranch, country, educationlevel, remotework, comptotal, yearsofexperience):
 
 
 
55
  """Predict salary using loaded models and evaluate statistics."""
56
+ input_data = pd.DataFrame(
57
+ [[mainbranch, country, educationlevel, remotework, comptotal, yearsofexperience]],
58
+ columns=['MainBranch', 'Country', 'EducationLevel', 'RemoteWork', 'CompTotal', 'YearsOfExperience']
59
+ )
60
+
61
  results = []
62
 
63
  for name, model in models.items():
64
  try:
65
+ salary_pred = model.predict(input_data)[0]
66
  results.append({
67
  'Model': name,
68
  'Predicted Salary': salary_pred,
69
+ 'R2 Score (%)': r2_score(y, model.predict(X)) * 100, # Re-calculate R2 score on the entire dataset
70
  })
71
  except Exception as e:
72
+ print(f"Error during prediction with model {name}: {str(e)}") # Logging
73
+
74
  return pd.DataFrame(results).sort_values(by='R2 Score (%)', ascending=False).reset_index(drop=True)
75
 
76
+ # Gradio interface
77
+ inputs = [
78
+ gr.Dropdown(choices=input_choices['MainBranch'], label="Main Branch"),
79
+ gr.Dropdown(choices=input_choices['Country'], label="Country"),
80
+ gr.Dropdown(choices=input_choices['EducationLevel'], label="Education Level"),
81
+ gr.Dropdown(choices=input_choices['RemoteWork'], label="Remote Work"),
82
+ gr.Number(minimum=0.0, maximum=max_comp, value=default_comp, step=0.5, label="CompTotal"),
83
+ gr.Number(minimum=0.0, maximum=50, value=default_years, step=0.5, label="Years of Experience"),
84
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ output = gr.Dataframe(label="Prediction Results")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ gr.Interface(
89
+ fn=load_and_predict,
90
+ inputs=inputs,
91
+ outputs=output,
92
+ title="Developer Salary Prediction App",
93
+ description="This application predicts developer salaries using multiple machine learning models. Provide your details to get salary predictions.",
94
+ ).launch()