import gradio as gr import numpy as np import pandas as pd from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB import pickle import sys def greet(name): return "Hello " + name + "!!" # load the CountVectorizer from disk cv = pickle.load(open('countVectTrain.pkl', 'rb')) # load the model from disk filename = 'corona_pred.pkl' model = pickle.load(open(filename, 'rb')) # function to convert sequence string into k-mer words, default size = 6 (hexamer words) kmer_size = 6 def getKmers(sequence, size=kmer_size): return [sequence[x:x+size].lower() for x in range(len(sequence) - size + 1)] # define the Gradio interface def classify_sequence(sequence): # convert the input sequence into k-mer words words = getKmers(sequence) # convert the k-mer words into a list of space-separated strings text = ' '.join(words) # vectorize the text using Count Vectorization X = cv.transform([text]) # make predictions using the pre-trained model pred_label = model.predict(X)[0] pred_prob_percentage = model.predict_proba(X).max()*100 # return the predicted class and probability return {'predicted_class': pred_label, 'probability': pred_prob_percentage} iface = gr.Interface(fn=classify_sequence, inputs="text", outputs=["text"], title="Coronavirus Sequence Classifier", description="Enter a coronavirus sequence to predict its class and probability.") iface.launch()