File size: 3,159 Bytes
d8644d2
 
 
 
 
 
 
 
af3f3c6
87a8fc1
d93e629
 
 
eee55f1
d93e629
d8644d2
 
 
 
 
 
 
 
af3f3c6
 
 
 
269b1b9
ed3f9b7
af3f3c6
e8c982d
 
6eb8d02
e8c982d
e03a777
 
 
 
 
 
 
5f66429
 
4697b71
 
fb796b2
5f66429
fb796b2
 
4697b71
fb796b2
5f66429
d8644d2
 
 
3e5fba9
 
 
 
 
d8644d2
 
3e5fba9
a2cc357
d8644d2
 
060d31a
 
 
a2cc357
d8644d2
e8c982d
c77bfb5
 
 
d8644d2
6eb8d02
d8644d2
c77bfb5
d8644d2
e771c33
d8644d2
e8c982d
 
d93e629
6eb8d02
d8644d2
d93e629
d8644d2
d93e629
 
 
 
 
 
 
a70c4b4
e8c982d
d93e629
d8644d2
32908de
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import os
import gradio as gr
import joblib
import subprocess
import pandas as pd
import json
from pathlib import Path
from threading import Lock
from huggingface_hub import CommitScheduler
import uuid
from huggingface_hub import HfApi

# Perform API calls using the `api` object
# For example, to list all models:
# models = api.list_models()
# print(models)

# Run the training script placed in the same directory as app.py
# The training script will train and persist a linear regression
# model with the filename 'model.joblib'
subprocess.run(['python', 'train.py', 'insurance.csv', 'model.joblib'])

# Load the freshly trained model from disk
model = joblib.load("model.joblib")

# Define a function for making predictions and logging them
def predict(age, bmi, children, sex, smoker, region):
    data = {
        'age': age, 
        'bmi': bmi, 
        'children': children, 
        'sex': sex, 
        'smoker': smoker, 
        'region': region}

    df = pd.DataFrame(data, index=[0])
    
    # Apply the same transformations used in training
    transformed_df = model.named_steps['columntransformer'].transform(df)
    #print("Transformed DataFrame:", transformed_df)  # Debug transformed data
    prediction = model.predict(df)[0]
    prediction = float(prediction)

   
    

    # Prepare the log entry
    log_entry = {
        'age': age,
        'bmi': bmi,
        'children': children,
        'sex': sex,
        'smoker': smoker,
        'region': region,
        'prediction': prediction
    }

    # Log the prediction
    log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
    log_file.parent.mkdir(parents=True, exist_ok=True)
    with log_file.open("w") as f:
        json.dump(log_entry, f)

    return prediction

# Prepare the logging functionality
log_folder = Path("logs/")

scheduler = CommitScheduler(
    repo_id="insurance-charge-mlops-logs",  # Replace 'your-username' with your actual username
    repo_type="dataset",
    folder_path=log_folder,
    path_in_repo="data",
    every=2,  # Adjust the scheduling as needed
)

# Set up UI components for input and output
age_input = gr.Slider(0, 100, label='Age')
bmi_input = gr.Slider(15, 50, label='BMI')
children_input = gr.Slider(0, 5, step=1, label='Children')
sex_input = gr.Radio(['female', 'male'], label='Sex')
region_input = gr.Radio(['Northeast', 'Southeast', 'Northwest', 'Southwest'], label='Region')
smoker_input = gr.Radio(['yes', 'no'], label='Smoker')

# Create a gradio interface
interface = gr.Interface(
    fn=predict,
    inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input],
    outputs="text",
    title="kgauvin603/HealthyLife Insurance Charge Prediction"
)

# Start the commitScheduler and Gradio interface
interface.launch(share=True, debug=True)