Spaces:

Pranjal-666
/

COVID_classify_sequence

Sleeping

"Model"

36358e5 over 1 year ago

1.51 kB

	import gradio as gr
	import numpy as np
	import pandas as pd
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.naive_bayes import MultinomialNB
	import pickle
	import sys

	def greet(name):
	return "Hello " + name + "!!"

	# load the CountVectorizer from disk
	cv = pickle.load(open('countVectTrain.pkl', 'rb'))

	# load the model from disk
	filename = 'corona_pred.pkl'
	model = pickle.load(open(filename, 'rb'))

	# function to convert sequence string into k-mer words, default size = 6 (hexamer words)
	kmer_size = 6
	def getKmers(sequence, size=kmer_size):
	return [sequence[x:x+size].lower() for x in range(len(sequence) - size + 1)]

	# define the Gradio interface
	def classify_sequence(sequence):
	# convert the input sequence into k-mer words
	words = getKmers(sequence)
	# convert the k-mer words into a list of space-separated strings
	text = ' '.join(words)
	# vectorize the text using Count Vectorization
	X = cv.transform([text])
	# make predictions using the pre-trained model
	pred_label = model.predict(X)[0]
	pred_prob_percentage = model.predict_proba(X).max()*100
	# return the predicted class and probability
	return {'predicted_class': pred_label, 'probability': pred_prob_percentage}

	iface = gr.Interface(fn=classify_sequence, inputs="text", outputs=["text"],
	title="Coronavirus Sequence Classifier",
	description="Enter a coronavirus sequence to predict its class and probability.")

	iface.launch()