Spaces:
Sleeping
Sleeping
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
import pickle | |
import sys | |
def greet(name): | |
return "Hello " + name + "!!" | |
# load the CountVectorizer from disk | |
cv = pickle.load(open('countVectTrain.pkl', 'rb')) | |
# load the model from disk | |
filename = 'corona_pred.pkl' | |
model = pickle.load(open(filename, 'rb')) | |
# function to convert sequence string into k-mer words, default size = 6 (hexamer words) | |
kmer_size = 6 | |
def getKmers(sequence, size=kmer_size): | |
return [sequence[x:x+size].lower() for x in range(len(sequence) - size + 1)] | |
# define the Gradio interface | |
def classify_sequence(sequence): | |
# convert the input sequence into k-mer words | |
words = getKmers(sequence) | |
# convert the k-mer words into a list of space-separated strings | |
text = ' '.join(words) | |
# vectorize the text using Count Vectorization | |
X = cv.transform([text]) | |
# make predictions using the pre-trained model | |
pred_label = model.predict(X)[0] | |
pred_prob_percentage = model.predict_proba(X).max()*100 | |
# return the predicted class and probability | |
return {'predicted_class': pred_label, 'probability': pred_prob_percentage} | |
iface = gr.Interface(fn=classify_sequence, inputs="text", outputs=["text"], | |
title="Coronavirus Sequence Classifier", | |
description="Enter a coronavirus sequence to predict its class and probability.") | |
iface.launch() |