Spaces:
Runtime error
Runtime error
from sklearn.datasets import fetch_openml | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
from sklearn.compose import make_column_transformer | |
from sklearn.pipeline import make_pipeline | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import LinearRegression | |
from sklearn.metrics import mean_squared_error, r2_score | |
import os | |
import gradio as gr | |
import joblib | |
import subprocess | |
import pandas as pd | |
import json | |
from pathlib import Path | |
from threading import Lock | |
from huggingface_hub import CommitScheduler | |
import uuid | |
from huggingface_hub import HfApi | |
# Perform API calls using the `api` object | |
# For example, to list all models: | |
# models = api.list_models() | |
# print(models) | |
# Run the training script placed in the same directory as app.py | |
# The training script will train and persist a linear regression | |
# model with the filename 'model.joblib' | |
subprocess.run(['python', 'train.py', 'insurance.csv', 'model.joblib']) | |
# Load the freshly trained model from disk | |
model = joblib.load("model.joblib") | |
# Define a function for making predictions and logging them | |
def predict(age, bmi, children, sex, smoker, region): | |
data = { | |
'age': age, | |
'bmi': bmi, | |
'children': children, | |
'sex': sex, | |
'smoker': smoker, | |
'region': region} | |
df = pd.DataFrame(data, index=[0]) | |
# Apply the same transformations used in training | |
transformed_df = model.named_steps['columntransformer'].transform(df) | |
#print("Transformed DataFrame:", transformed_df) # Debug transformed data | |
prediction = model.predict(df)[0] | |
prediction = float(prediction) | |
# Prepare the log entry | |
log_entry = { | |
'age': age, | |
'bmi': bmi, | |
'children': children, | |
'sex': sex, | |
'smoker': smoker, | |
'region': region, | |
'prediction': prediction | |
} | |
# Log the prediction | |
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json" | |
log_file.parent.mkdir(parents=True, exist_ok=True) | |
with log_file.open("w") as f: | |
json.dump(log_entry, f) | |
return prediction | |
# Prepare the logging functionality | |
log_folder = Path("logs/") | |
scheduler = CommitScheduler( | |
repo_id="insurance-charge-mlops-logs", # Replace 'your-username' with your actual username | |
repo_type="dataset", | |
folder_path=log_folder, | |
path_in_repo="data", | |
every=2, # Adjust the scheduling as needed | |
) | |
# Set up UI components for input and output | |
age_input = gr.Slider(0, 100, label='Age') | |
bmi_input = gr.Slider(15, 50, label='BMI') | |
children_input = gr.Slider(0, 5, step=1, label='Children') | |
sex_input = gr.Radio(['female', 'male'], label='Sex') | |
region_input = gr.Radio(['Northeast', 'Southeast', 'Northwest', 'Southwest'], label='Region') | |
smoker_input = gr.Radio(['yes', 'no'], label='Smoker') | |
# Create a gradio interface | |
interface = gr.Interface( | |
fn=predict, | |
inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input], | |
outputs="text", | |
title="kgauvin603/HealthyLife Insurance Charge Prediction" | |
) | |
# Start the commitScheduler and Gradio interface | |
interface.launch(share=True, debug=True) |