from sklearn.datasets import fetch_openml from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import make_column_transformer from sklearn.pipeline import make_pipeline from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score import os import gradio as gr import joblib import subprocess import pandas as pd import json from pathlib import Path from threading import Lock from huggingface_hub import CommitScheduler import uuid from huggingface_hub import HfApi # Run the training script placed in the same directory as app.py # The training script will train and persist a linear regression # model with the filename 'model.joblib' subprocess.run(['python', 'train.py', 'insurance.csv', 'model.joblib']) # Load the freshly trained model from disk model = joblib.load("model.joblib") # Define a function for making predictions def predict(age, bmi, children, sex, smoker, region): data = {'age': age, 'bmi': bmi, 'children': children, 'sex': sex, 'smoker': smoker, 'region': region} # Convert the dictionary to a DataFrame df = pd.DataFrame(data, index=[0]) prediction = model.predict(df)[0] # Convert prediction to a standard Python float prediction = float(prediction) # Prepare the log entry log_entry = { 'age': age, 'bmi': bmi, 'children': children, 'sex': sex, 'smoker': smoker, 'region': region, 'prediction': prediction } # Log the prediction log_file = Path("logs/") / f"data_{uuid.uuid4()}.json" log_file.parent.mkdir(parents=True, exist_ok=True) with log_file.open("w") as f: json.dump(log_entry, f) return prediction # Prepare the logging functionality log_folder = Path("logs/") scheduler = CommitScheduler( repo_id="insurance-charge-mlops-logs", # Replace 'your-username' with your actual username repo_type="dataset", folder_path=log_folder, path_in_repo="data", every=2, # Adjust the scheduling as needed ) # Set up UI components for input and output age_input = gr.Slider(0, 100, label='Age') bmi_input = gr.Slider(15, 50,label='BMI') children_input = gr.Slider(0, 5, step=1, label='Children') sex_input = gr.Radio(['female', 'male'], label='Sex') region_input = gr.Radio(['Northeast', 'Southeast', 'Northwest', 'Southwest'], label='Region') smoker_input = gr.Radio(['yes', 'no'], label='Smoker') # Create a gradio interface interface = gr.Interface( fn=predict, inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input], outputs="text", title="HealthyLife Insurance Charge Prediction" ) # Start the commitScheduler and Gradio interface interface.launch(share=True)