Spaces:
Runtime error
Runtime error
File size: 3,159 Bytes
d8644d2 af3f3c6 87a8fc1 d93e629 eee55f1 d93e629 d8644d2 af3f3c6 269b1b9 ed3f9b7 af3f3c6 e8c982d 6eb8d02 e8c982d e03a777 5f66429 4697b71 fb796b2 5f66429 fb796b2 4697b71 fb796b2 5f66429 d8644d2 3e5fba9 d8644d2 3e5fba9 a2cc357 d8644d2 060d31a a2cc357 d8644d2 e8c982d c77bfb5 d8644d2 6eb8d02 d8644d2 c77bfb5 d8644d2 e771c33 d8644d2 e8c982d d93e629 6eb8d02 d8644d2 d93e629 d8644d2 d93e629 a70c4b4 e8c982d d93e629 d8644d2 32908de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import os
import gradio as gr
import joblib
import subprocess
import pandas as pd
import json
from pathlib import Path
from threading import Lock
from huggingface_hub import CommitScheduler
import uuid
from huggingface_hub import HfApi
# Perform API calls using the `api` object
# For example, to list all models:
# models = api.list_models()
# print(models)
# Run the training script placed in the same directory as app.py
# The training script will train and persist a linear regression
# model with the filename 'model.joblib'
subprocess.run(['python', 'train.py', 'insurance.csv', 'model.joblib'])
# Load the freshly trained model from disk
model = joblib.load("model.joblib")
# Define a function for making predictions and logging them
def predict(age, bmi, children, sex, smoker, region):
data = {
'age': age,
'bmi': bmi,
'children': children,
'sex': sex,
'smoker': smoker,
'region': region}
df = pd.DataFrame(data, index=[0])
# Apply the same transformations used in training
transformed_df = model.named_steps['columntransformer'].transform(df)
#print("Transformed DataFrame:", transformed_df) # Debug transformed data
prediction = model.predict(df)[0]
prediction = float(prediction)
# Prepare the log entry
log_entry = {
'age': age,
'bmi': bmi,
'children': children,
'sex': sex,
'smoker': smoker,
'region': region,
'prediction': prediction
}
# Log the prediction
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_file.parent.mkdir(parents=True, exist_ok=True)
with log_file.open("w") as f:
json.dump(log_entry, f)
return prediction
# Prepare the logging functionality
log_folder = Path("logs/")
scheduler = CommitScheduler(
repo_id="insurance-charge-mlops-logs", # Replace 'your-username' with your actual username
repo_type="dataset",
folder_path=log_folder,
path_in_repo="data",
every=2, # Adjust the scheduling as needed
)
# Set up UI components for input and output
age_input = gr.Slider(0, 100, label='Age')
bmi_input = gr.Slider(15, 50, label='BMI')
children_input = gr.Slider(0, 5, step=1, label='Children')
sex_input = gr.Radio(['female', 'male'], label='Sex')
region_input = gr.Radio(['Northeast', 'Southeast', 'Northwest', 'Southwest'], label='Region')
smoker_input = gr.Radio(['yes', 'no'], label='Smoker')
# Create a gradio interface
interface = gr.Interface(
fn=predict,
inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input],
outputs="text",
title="kgauvin603/HealthyLife Insurance Charge Prediction"
)
# Start the commitScheduler and Gradio interface
interface.launch(share=True, debug=True) |