wdp / app.py

Create app.py

75d0fc8 over 1 year ago

4.43 kB

	from joblib import load
	import numpy as np
	import pandas as pd

	# Load your saved models
	rf = load('best_random_forest_model.joblib')
	dt = load('best_decision_tree_model.joblib')
	mlp = load('best_MLP_classifier_model.joblib')
	knn = load('best_knn_model.joblib')

	# Class names
	class_names = ["Low Therapeutic Dose of Warfarin Required", "High Therapeutic Dose of Warfarin Required"]

	# Load training data for expected feature names
	training_data = pd.read_csv('/content/dataset_train.csv')

	# Drop the 'Unnamed: 0' column if it exists
	if 'Unnamed: 0' in training_data.columns:
	training_data = training_data.drop(columns=['Unnamed: 0'])

	expected_feature_names = training_data.columns.tolist()

	# Define the prediction function
	def predict_warfarin_dose(gender, race, age, height, weight, diabetes, simvastatin, amiodarone, genotype, inr, algorithm):
	# Decode the encoded values
	gender = "Male" if gender == 1 else "Female"
	race = race_dict_inverse[race]
	age = age_dict_inverse[age]
	genotype = genotype_dict_inverse[genotype]

	# Convert input data to DataFrame for one-hot encoding
	input_data = pd.DataFrame([[gender, race, age, height, weight, diabetes, simvastatin, amiodarone, genotype, inr]],
	columns=['gender', 'race', 'age', 'height', 'weight', 'diabetes', 'simvastatin',
	'amiodarone', 'genotype', 'inr'])

	# One-hot encode categorical features
	input_data_encoded = pd.get_dummies(input_data, columns=['gender', 'race', 'diabetes', 'simvastatin', 'amiodarone', 'genotype'])

	# Reindex the DataFrame to match expected feature names
	input_data_encoded = input_data_encoded.reindex(columns=expected_feature_names, fill_value=0)

	# Predict using the selected algorithm
	if algorithm == 'Random Forest':
	model = rf
	elif algorithm == 'Decision Tree':
	model = dt
	elif algorithm == 'MLP':
	model = mlp
	elif algorithm == 'KNN':
	model = knn
	else:
	raise ValueError("Invalid algorithm selected.")

	y_prob = model.predict_proba(input_data_encoded)
	class_idx = np.argmax(y_prob)

	preds_dict = {class_names[i]: float(y_prob[0, i]) for i in range(len(class_names))}
	name = class_names[class_idx]

	return name, preds_dict

	race_dict = {
	"African-American":0,
	"Asian":1,
	"Black":2,
	"Black African":3,"Black Caribbean":4,"Black or African American":5,"Black other":6 ,
	"Caucasian":7,"Chinese":8,"Han Chinese":9,"Hispanic":10,"Indian":11,"Intermediate":12,
	"Japanese":13,"Korean":14, "Malay":15, "Other":16, "Other (Black British)":17, "Other (Hungarian)":18, "Other Mixed Race":19, "White":20}


	age_dict = {
	"10-19":0,
	"20-29":1,
	"30-39":2,
	"40-49":3,"50-59":4,"60-69":5,"70-79":6,
	"80-89":7,"90+":8}

	genotype_dict = {"A/A":0, "A/G":1, "G/G":2}
	# Invert dictionaries for decoding
	genotype_dict_inverse = {v: k for k, v in genotype_dict.items()}
	race_dict_inverse = {v: k for k, v in race_dict.items()}
	age_dict_inverse = {v: k for k, v in age_dict.items()}

	# Create Gradio interface
	gender_choices = [("Male", 1), ("Female", 0)]
	gender_module = gr.Dropdown(choices=gender_choices, label="Gender")

	# Assuming race_choices, age_choices, genotype_choices are already defined
	race_module = gr.Dropdown(choices=list(race_dict.items()), label="Race")
	age_module = gr.Dropdown(choices=list(age_dict.items()), label="Age Group")
	genotype_module = gr.Dropdown(choices=list(genotype_dict.items()), label="Genotype")

	height_module = gr.Number(label="Height")
	weight_module = gr.Number(label="Weight")
	diabetes_module = gr.Number(label="Diabetes")
	simvastatin_module = gr.Radio(choices=[0, 1], label="Simvastatin")
	amiodarone_module = gr.Radio(choices=[0, 1], label="Amiodarone")
	inr_module = gr.Number(label="INR Reported")
	algorithm_module = gr.Dropdown(choices=["Random Forest", "Decision Tree", "MLP", "KNN"], label="Algorithm")
	output_module1 = gr.Textbox(label="Predicted Class")
	output_module2 = gr.Label(label="Predicted Probability")

	iface = gr.Interface(fn=predict_warfarin_dose,
	inputs=[gender_module, race_module, age_module, height_module, weight_module, diabetes_module,
	simvastatin_module, amiodarone_module, genotype_module, inr_module, algorithm_module],
	outputs=[output_module1, output_module2])

	iface.launch(debug=True)