Spaces:

kgauvin603
/

HealthyLife-Insurance-Charge-Prediction

Runtime error

App Files Files Community

HealthyLife-Insurance-Charge-Prediction / app.py

kgauvin603

Update app.py

fb796b2 verified 10 months ago

raw

history blame

3.16 kB

	from sklearn.datasets import fetch_openml
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.compose import make_column_transformer
	from sklearn.pipeline import make_pipeline
	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LinearRegression
	from sklearn.metrics import mean_squared_error, r2_score
	import os
	import gradio as gr
	import joblib
	import subprocess
	import pandas as pd
	import json
	from pathlib import Path
	from threading import Lock
	from huggingface_hub import CommitScheduler
	import uuid
	from huggingface_hub import HfApi

	# Perform API calls using the `api` object
	# For example, to list all models:
	# models = api.list_models()
	# print(models)

	# Run the training script placed in the same directory as app.py
	# The training script will train and persist a linear regression
	# model with the filename 'model.joblib'
	subprocess.run(['python', 'train.py', 'insurance.csv', 'model.joblib'])

	# Load the freshly trained model from disk
	model = joblib.load("model.joblib")

	# Define a function for making predictions and logging them
	def predict(age, bmi, children, sex, smoker, region):
	data = {
	'age': age,
	'bmi': bmi,
	'children': children,
	'sex': sex,
	'smoker': smoker,
	'region': region}

	df = pd.DataFrame(data, index=[0])

	# Apply the same transformations used in training
	transformed_df = model.named_steps['columntransformer'].transform(df)
	#print("Transformed DataFrame:", transformed_df) # Debug transformed data
	prediction = model.predict(df)[0]
	prediction = float(prediction)




	# Prepare the log entry
	log_entry = {
	'age': age,
	'bmi': bmi,
	'children': children,
	'sex': sex,
	'smoker': smoker,
	'region': region,
	'prediction': prediction
	}

	# Log the prediction
	log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
	log_file.parent.mkdir(parents=True, exist_ok=True)
	with log_file.open("w") as f:
	json.dump(log_entry, f)

	return prediction

	# Prepare the logging functionality
	log_folder = Path("logs/")

	scheduler = CommitScheduler(
	repo_id="insurance-charge-mlops-logs", # Replace 'your-username' with your actual username
	repo_type="dataset",
	folder_path=log_folder,
	path_in_repo="data",
	every=2, # Adjust the scheduling as needed
	)

	# Set up UI components for input and output
	age_input = gr.Slider(0, 100, label='Age')
	bmi_input = gr.Slider(15, 50, label='BMI')
	children_input = gr.Slider(0, 5, step=1, label='Children')
	sex_input = gr.Radio(['female', 'male'], label='Sex')
	region_input = gr.Radio(['Northeast', 'Southeast', 'Northwest', 'Southwest'], label='Region')
	smoker_input = gr.Radio(['yes', 'no'], label='Smoker')

	# Create a gradio interface
	interface = gr.Interface(
	fn=predict,
	inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input],
	outputs="text",
	title="kgauvin603/HealthyLife Insurance Charge Prediction"
	)

	# Start the commitScheduler and Gradio interface
	interface.launch(share=True, debug=True)