File size: 4,454 Bytes
6279287
1d16fd6
 
 
 
 
 
 
 
 
 
 
6178de1
1d16fd6
 
3617ceb
1d16fd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84ecedd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
from joblib import load
import numpy as np
import pandas as pd

# Load your saved models
rf = load('best_random_forest_model.joblib')
dt = load('best_decision_tree_model.joblib')
mlp = load('best_MLP_classifier_model.joblib')
knn = load('best_knn_model.joblib')

# Class names
class_names = ["High Therapeutic Dose of Warfarin Required","Low Therapeutic Dose of Warfarin Required"]

# Load training data for expected feature names
training_data = pd.read_csv('dataset_train.csv')

# Drop the 'Unnamed: 0' column if it exists
if 'Unnamed: 0' in training_data.columns:
    training_data = training_data.drop(columns=['Unnamed: 0'])

expected_feature_names = training_data.columns.tolist()

# Define the prediction function
def predict_warfarin_dose(gender, race, age, height, weight, diabetes, simvastatin, amiodarone, genotype, inr, algorithm):
    # Decode the encoded values
    gender = "Male" if gender == 1 else "Female"
    race = race_dict_inverse[race]
    age = age_dict_inverse[age]
    genotype = genotype_dict_inverse[genotype]

    # Convert input data to DataFrame for one-hot encoding
    input_data = pd.DataFrame([[gender, race, age, height, weight, diabetes, simvastatin, amiodarone, genotype, inr]],
                               columns=['gender', 'race', 'age', 'height', 'weight', 'diabetes', 'simvastatin',
                                        'amiodarone', 'genotype', 'inr'])

    # One-hot encode categorical features
    input_data_encoded = pd.get_dummies(input_data, columns=['gender', 'race', 'diabetes', 'simvastatin', 'amiodarone', 'genotype'])

    # Reindex the DataFrame to match expected feature names
    input_data_encoded = input_data_encoded.reindex(columns=expected_feature_names, fill_value=0)

    # Predict using the selected algorithm
    if algorithm == 'Random Forest':
        model = rf
    elif algorithm == 'Decision Tree':
        model = dt
    elif algorithm == 'MLP':
        model = mlp
    elif algorithm == 'KNN':
        model = knn
    else:
        raise ValueError("Invalid algorithm selected.")

    y_prob = model.predict_proba(input_data_encoded)
    class_idx = np.argmax(y_prob)

    preds_dict = {class_names[i]: float(y_prob[0, i]) for i in range(len(class_names))}
    name = class_names[class_idx]

    return name, preds_dict

race_dict = {
    "African-American":0,
    "Asian":1,
   "Black":2,
    "Black African":3,"Black Caribbean":4,"Black or African American":5,"Black other":6 ,
    "Caucasian":7,"Chinese":8,"Han Chinese":9,"Hispanic":10,"Indian":11,"Intermediate":12,
    "Japanese":13,"Korean":14, "Malay":15, "Other":16, "Other (Black British)":17, "Other (Hungarian)":18, "Other Mixed Race":19, "White":20}


age_dict = {
    "10-19":0,
    "20-29":1,
    "30-39":2,
    "40-49":3,"50-59":4,"60-69":5,"70-79":6,
   "80-89":7,"90+":8}

genotype_dict = {"A/A":0, "A/G":1, "G/G":2}
# Invert dictionaries for decoding
genotype_dict_inverse = {v: k for k, v in genotype_dict.items()}
race_dict_inverse = {v: k for k, v in race_dict.items()}
age_dict_inverse = {v: k for k, v in age_dict.items()}

# Create Gradio interface
gender_choices = [("Male", 1), ("Female", 0)]
gender_module = gr.Dropdown(choices=gender_choices, label="Gender")

# Assuming race_choices, age_choices, genotype_choices are already defined
race_module = gr.Dropdown(choices=list(race_dict.items()), label="Race")
age_module = gr.Dropdown(choices=list(age_dict.items()), label="Age Group")
genotype_module = gr.Dropdown(choices=list(genotype_dict.items()), label="Genotype")

height_module = gr.Number(label="Height")
weight_module = gr.Number(label="Weight")
diabetes_module = gr.Number(label="Diabetes")
simvastatin_module = gr.Radio(choices=[0, 1], label="Simvastatin")
amiodarone_module = gr.Radio(choices=[0, 1], label="Amiodarone")
inr_module = gr.Number(label="INR Reported")
algorithm_module = gr.Dropdown(choices=["Random Forest", "Decision Tree", "MLP", "KNN"], label="Algorithm")
output_module1 = gr.Textbox(label="Predicted Class")
output_module2 = gr.Label(label="Predicted Probability")

iface = gr.Interface(fn=predict_warfarin_dose,
                     inputs=[gender_module, race_module, age_module, height_module, weight_module, diabetes_module,
                             simvastatin_module, amiodarone_module, genotype_module, inr_module, algorithm_module],
                     outputs=[output_module1, output_module2])

iface.launch(debug=True,share=True)