from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import os
import gradio as gr
import joblib
import subprocess
import pandas as pd
import json
from pathlib import Path
from threading import Lock
from huggingface_hub import CommitScheduler
import uuid
from huggingface_hub import HfApi


# Run the training script placed in the same directory as app.py
# The training script will train and persist a linear regression
# model with the filename 'model.joblib'
subprocess.run(['python', 'train.py', 'insurance.csv', 'model.joblib'])

# Load the freshly trained model from disk
model = joblib.load("model.joblib")

# Define a function for making predictions
def predict(age, bmi, children, sex, smoker, region):
    data = {'age': age, 'bmi': bmi, 'children': children, 'sex': sex, 'smoker': smoker, 'region': region}
    # Convert the dictionary to a DataFrame
    df = pd.DataFrame(data, index=[0])
    prediction = model.predict(df)[0]
# Convert prediction to a standard Python float
    prediction = float(prediction)
    # Prepare the log entry
    log_entry = {
        'age': age,
        'bmi': bmi,
        'children': children,
        'sex': sex,
        'smoker': smoker,
        'region': region,
        'prediction': prediction
    }

    # Log the prediction
    log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
    log_file.parent.mkdir(parents=True, exist_ok=True)
    with log_file.open("w") as f:
        json.dump(log_entry, f)

    return prediction

# Prepare the logging functionality
log_folder = Path("logs/")

scheduler = CommitScheduler(
    repo_id="insurance-charge-mlops-logs",  # Replace 'your-username' with your actual username
    repo_type="dataset",
    folder_path=log_folder,
    path_in_repo="data",
    every=2,  # Adjust the scheduling as needed
)

# Set up UI components for input and output
age_input = gr.Slider(0, 100, label='Age')
bmi_input = gr.Slider(15, 50,label='BMI')
children_input = gr.Slider(0, 5, step=1, label='Children')
sex_input = gr.Radio(['female', 'male'], label='Sex')
region_input = gr.Radio(['Northeast', 'Southeast', 'Northwest', 'Southwest'], label='Region')
smoker_input = gr.Radio(['yes', 'no'], label='Smoker')

# Create a gradio interface
interface = gr.Interface(
    fn=predict,
    inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input],
    outputs="text",
    title="HealthyLife Insurance Charge Prediction"
)

# Start the commitScheduler and Gradio interface
interface.launch(share=True)