Spaces:

anirudhabokil
/

healthylife_charge_predictor

Sleeping

App Files Files Community

anirudhabokil commited on Nov 9, 2024

Commit

0650863

verified ·

1 Parent(s): d9db921

Adding train.py and calling it from app.py

Browse files

train.py will create the model
app.py will call train to create model

Files changed (2) hide show

app.py +5 -4
train.py +40 -0

app.py CHANGED Viewed

@@ -13,6 +13,8 @@ from pathlib import Path
 # Run the training script placed in the same directory as app.py
 # The training script will train and persist a linear regression
 # model with the filename 'model.joblib'
@@ -78,12 +80,11 @@ def predict_insurance_charge(age, bmi, children, sex, smoker, region):
                   'sex': sex,
                   'smoker': smoker,
                   'region': region,
-                  'prediction': 123
               }
           ))
           f.write("\n")
-  #round prediction[0] to 2 decimal
-  return round(prediction[0], 2)
 # Create the gradio interface, make title "HealthyLife Insurance Charge Prediction"
@@ -91,7 +92,7 @@ demo = gr.Interface(fn=predict_insurance_charge,
                     inputs=[age, bmi, children, sex, smoker, region],
                     outputs=model_output,
                     title="HealthyLife Insurance Charge Prediction",
-                    description="This API allows you uo predict insurance charge",
                     flagging_mode="auto",
                     concurrency_limit=8)

 # Run the training script placed in the same directory as app.py
+os.system("python train.py")
 # The training script will train and persist a linear regression
 # model with the filename 'model.joblib'
                   'sex': sex,
                   'smoker': smoker,
                   'region': region,
+                  'prediction': prediction[0]
               }
           ))
           f.write("\n")
+  return prediction[0]
 # Create the gradio interface, make title "HealthyLife Insurance Charge Prediction"
                     inputs=[age, bmi, children, sex, smoker, region],
                     outputs=model_output,
                     title="HealthyLife Insurance Charge Prediction",
+                    description="This API allows you to predict insurance charge",
                     flagging_mode="auto",
                     concurrency_limit=8)

train.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import joblib
+from sklearn.datasets import fetch_openml
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
+from sklearn.compose import make_column_transformer
+from sklearn.pipeline import make_pipeline
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LinearRegression
+from sklearn.metrics import root_mean_squared_error, r2_score, mean_squared_error
+import numpy as np
+import pandas as pd
+df_original = pd.read_csv("hf://datasets/anirudhabokil/insurance_data/insurance_data.csv")
+target = 'charges'
+#remove index column and assignt o new dataset df
+df = df_original.drop(columns=['index'])
+numerical_features = ['age', 'bmi', 'children']
+categorical_features = ['sex', 'smoker', 'region']
+X = df[numerical_features + categorical_features]
+y = df[target]
+print('Splitting data')
+Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)
+preprocessor = make_column_transformer(
+    (StandardScaler(), numerical_features),
+    (OneHotEncoder(), categorical_features)
+)
+model_logistic_regression = LinearRegression(n_jobs=-1)
+print('Estimating model pipelline')
+model_pipeline = make_pipeline(preprocessor, model_logistic_regression)
+model_pipeline.fit(Xtrain, ytrain)
+prediction = model_pipeline.predict(Xtest)
+print('Logging metrics')
+print(f"R-squared: {r2_score(ytest, prediction)}")
+print(f"RMSE: {root_mean_squared_error(ytest, prediction)}")
+print("Serializing model")
+saved_mode_path = 'model.joblib'
+joblib.dump(model_pipeline, 'model.joblib')