Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoModel, AutoTokenizer, AutoModelForTokenClassification | |
import torch | |
import numpy as np | |
import torch.nn.functional as F | |
import matplotlib.pyplot as plt | |
tokenizer = AutoTokenizer.from_pretrained("./checkpoint-final/") | |
model = AutoModelForTokenClassification.from_pretrained("./checkpoint-final/") | |
model = model.eval() | |
examples = [ | |
["GSHMSDNEDNFDGDDFDDVEEDEGLDDLENAEEEGQENVEILPSGERPQANQKRITTPYMTKYERARVLGTRALQIAMCAPVMVELEGETDPLLIAMKELKARKIPIIIRRYLPDGSYEDWGVDELIITD"]] | |
def get_out(sent): | |
prefix = "" | |
if len(sent)>1022: | |
sent = sent[:1022] | |
prefix = "Your protein was longer than 1022 AAs. We are working on including longer sequences but in the meantime, here are the scores for the first 1022 AAs: \n " | |
print(sent) | |
encoded = tokenizer.encode_plus(sent, return_tensors="pt") | |
with torch.no_grad(): | |
output = model(**encoded) | |
output = F.softmax(torch.squeeze(output['logits']))[1:-1,1].detach().numpy() | |
fig = plt.figure() | |
plt.plot(output) | |
plt.xticks(fontsize=15) | |
plt.yticks(fontsize=15) | |
plt.xlabel('Sequence position', fontsize=15) | |
plt.ylabel('DR-BERT score', fontsize=15) | |
output = ','.join(str(x) for x in output) | |
return (fig,prefix+output) | |
gr.Interface( | |
get_out, | |
[ | |
gr.components.Textbox(label="Input Amino Acid Sequence", placeholder = " Amino acid sequence here ...") | |
], | |
["plot","text"], | |
examples=examples, | |
title="DR-BERT: A Protein Language Model to Predict Disordered Regions", | |
description="The app uses DR-BERT to predict disordered regions in proteins. Outputs generated are the probability that a residue is disordered." | |
).launch() |