anirudhabokil commited on
Commit
0650863
·
verified ·
1 Parent(s): d9db921

Adding train.py and calling it from app.py

Browse files

train.py will create the model
app.py will call train to create model

Files changed (2) hide show
  1. app.py +5 -4
  2. train.py +40 -0
app.py CHANGED
@@ -13,6 +13,8 @@ from pathlib import Path
13
 
14
 
15
  # Run the training script placed in the same directory as app.py
 
 
16
  # The training script will train and persist a linear regression
17
  # model with the filename 'model.joblib'
18
 
@@ -78,12 +80,11 @@ def predict_insurance_charge(age, bmi, children, sex, smoker, region):
78
  'sex': sex,
79
  'smoker': smoker,
80
  'region': region,
81
- 'prediction': 123
82
  }
83
  ))
84
  f.write("\n")
85
- #round prediction[0] to 2 decimal
86
- return round(prediction[0], 2)
87
 
88
 
89
  # Create the gradio interface, make title "HealthyLife Insurance Charge Prediction"
@@ -91,7 +92,7 @@ demo = gr.Interface(fn=predict_insurance_charge,
91
  inputs=[age, bmi, children, sex, smoker, region],
92
  outputs=model_output,
93
  title="HealthyLife Insurance Charge Prediction",
94
- description="This API allows you uo predict insurance charge",
95
  flagging_mode="auto",
96
  concurrency_limit=8)
97
 
 
13
 
14
 
15
  # Run the training script placed in the same directory as app.py
16
+ os.system("python train.py")
17
+
18
  # The training script will train and persist a linear regression
19
  # model with the filename 'model.joblib'
20
 
 
80
  'sex': sex,
81
  'smoker': smoker,
82
  'region': region,
83
+ 'prediction': prediction[0]
84
  }
85
  ))
86
  f.write("\n")
87
+ return prediction[0]
 
88
 
89
 
90
  # Create the gradio interface, make title "HealthyLife Insurance Charge Prediction"
 
92
  inputs=[age, bmi, children, sex, smoker, region],
93
  outputs=model_output,
94
  title="HealthyLife Insurance Charge Prediction",
95
+ description="This API allows you to predict insurance charge",
96
  flagging_mode="auto",
97
  concurrency_limit=8)
98
 
train.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import joblib
3
+ from sklearn.datasets import fetch_openml
4
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
5
+ from sklearn.compose import make_column_transformer
6
+ from sklearn.pipeline import make_pipeline
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.linear_model import LinearRegression
9
+ from sklearn.metrics import root_mean_squared_error, r2_score, mean_squared_error
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+ df_original = pd.read_csv("hf://datasets/anirudhabokil/insurance_data/insurance_data.csv")
14
+
15
+ target = 'charges'
16
+ #remove index column and assignt o new dataset df
17
+ df = df_original.drop(columns=['index'])
18
+ numerical_features = ['age', 'bmi', 'children']
19
+ categorical_features = ['sex', 'smoker', 'region']
20
+
21
+ X = df[numerical_features + categorical_features]
22
+ y = df[target]
23
+ print('Splitting data')
24
+ Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)
25
+ preprocessor = make_column_transformer(
26
+ (StandardScaler(), numerical_features),
27
+ (OneHotEncoder(), categorical_features)
28
+ )
29
+ model_logistic_regression = LinearRegression(n_jobs=-1)
30
+ print('Estimating model pipelline')
31
+ model_pipeline = make_pipeline(preprocessor, model_logistic_regression)
32
+ model_pipeline.fit(Xtrain, ytrain)
33
+ prediction = model_pipeline.predict(Xtest)
34
+
35
+ print('Logging metrics')
36
+ print(f"R-squared: {r2_score(ytest, prediction)}")
37
+ print(f"RMSE: {root_mean_squared_error(ytest, prediction)}")
38
+ print("Serializing model")
39
+ saved_mode_path = 'model.joblib'
40
+ joblib.dump(model_pipeline, 'model.joblib')