kgauvin603's picture
Update app.py
fb796b2 verified
raw
history blame
3.16 kB
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import os
import gradio as gr
import joblib
import subprocess
import pandas as pd
import json
from pathlib import Path
from threading import Lock
from huggingface_hub import CommitScheduler
import uuid
from huggingface_hub import HfApi
# Perform API calls using the `api` object
# For example, to list all models:
# models = api.list_models()
# print(models)
# Run the training script placed in the same directory as app.py
# The training script will train and persist a linear regression
# model with the filename 'model.joblib'
subprocess.run(['python', 'train.py', 'insurance.csv', 'model.joblib'])
# Load the freshly trained model from disk
model = joblib.load("model.joblib")
# Define a function for making predictions and logging them
def predict(age, bmi, children, sex, smoker, region):
data = {
'age': age,
'bmi': bmi,
'children': children,
'sex': sex,
'smoker': smoker,
'region': region}
df = pd.DataFrame(data, index=[0])
# Apply the same transformations used in training
transformed_df = model.named_steps['columntransformer'].transform(df)
#print("Transformed DataFrame:", transformed_df) # Debug transformed data
prediction = model.predict(df)[0]
prediction = float(prediction)
# Prepare the log entry
log_entry = {
'age': age,
'bmi': bmi,
'children': children,
'sex': sex,
'smoker': smoker,
'region': region,
'prediction': prediction
}
# Log the prediction
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_file.parent.mkdir(parents=True, exist_ok=True)
with log_file.open("w") as f:
json.dump(log_entry, f)
return prediction
# Prepare the logging functionality
log_folder = Path("logs/")
scheduler = CommitScheduler(
repo_id="insurance-charge-mlops-logs", # Replace 'your-username' with your actual username
repo_type="dataset",
folder_path=log_folder,
path_in_repo="data",
every=2, # Adjust the scheduling as needed
)
# Set up UI components for input and output
age_input = gr.Slider(0, 100, label='Age')
bmi_input = gr.Slider(15, 50, label='BMI')
children_input = gr.Slider(0, 5, step=1, label='Children')
sex_input = gr.Radio(['female', 'male'], label='Sex')
region_input = gr.Radio(['Northeast', 'Southeast', 'Northwest', 'Southwest'], label='Region')
smoker_input = gr.Radio(['yes', 'no'], label='Smoker')
# Create a gradio interface
interface = gr.Interface(
fn=predict,
inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input],
outputs="text",
title="kgauvin603/HealthyLife Insurance Charge Prediction"
)
# Start the commitScheduler and Gradio interface
interface.launch(share=True, debug=True)