Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import gc | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from collections import Counter | |
from sklearn.preprocessing import LabelEncoder | |
from keras.models import Model | |
from keras.regularizers import l2 | |
from keras.constraints import max_norm | |
from keras.utils import to_categorical | |
from keras.preprocessing.text import Tokenizer | |
from keras.utils import pad_sequences | |
from keras.callbacks import EarlyStopping | |
from keras.layers import Input, Dense, Dropout, Flatten, Activation | |
from keras.layers import Conv1D, Add, MaxPooling1D, BatchNormalization | |
from keras.layers import Embedding, Bidirectional, LSTM, CuDNNLSTM, GlobalMaxPooling1D | |
import tensorflow as tf | |
from huggingface_hub import hf_hub_url, cached_download | |
class Sequence: | |
codes = {c: i+1 for i, c in enumerate(['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'])} | |
def integer_encoding(cls, data): | |
""" | |
- Encodes code sequence to integer values. | |
- 20 common amino acids are taken into consideration | |
and remaining four are categorized as 0. | |
""" | |
return np.array([cls.codes.get(code, 0) for code in data]) | |
def prepare(cls, sequence): | |
sequence = sequence.strip().upper() | |
ie = cls.integer_encoding(sequence) | |
max_length = 100 | |
padded_ie = pad_sequences([ie], maxlen=max_length, padding='post', truncating='post') | |
all_ohe = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + [0]*(100-21)) | |
return to_categorical(np.array([padded_ie[0], all_ohe]))[:1] | |
def residual_block(data, filters, d_rate): | |
""" | |
_data: input | |
_filters: convolution filters | |
_d_rate: dilation rate | |
""" | |
shortcut = data | |
bn1 = BatchNormalization()(data) | |
act1 = Activation('relu')(bn1) | |
conv1 = Conv1D(filters, 1, dilation_rate=d_rate, padding='same', kernel_regularizer=l2(0.001))(act1) | |
#bottleneck convolution | |
bn2 = BatchNormalization()(conv1) | |
act2 = Activation('relu')(bn2) | |
conv2 = Conv1D(filters, 3, padding='same', kernel_regularizer=l2(0.001))(act2) | |
#skip connection | |
x = Add()([conv2, shortcut]) | |
return x | |
def get_model(): | |
# model | |
x_input = Input(shape=(100, 21)) | |
#initial conv | |
conv = Conv1D(128, 1, padding='same')(x_input) | |
# per-residue representation | |
res1 = residual_block(conv, 128, 2) | |
res2 = residual_block(res1, 128, 3) | |
x = MaxPooling1D(3)(res2) | |
x = Dropout(0.5)(x) | |
# softmax classifier | |
x = Flatten()(x) | |
x_output = Dense(1000, activation='softmax', kernel_regularizer=l2(0.0001))(x) | |
model2 = Model(inputs=x_input, outputs=x_output) | |
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) | |
weights = cached_download(hf_hub_url("jonathang/Protein_Family_CNN", 'model2.h5')) | |
model2.load_weights(weights) | |
return model2 | |
model = get_model() | |
def greet(seq): | |
processed_seq = Sequence.prepare(seq) | |
raw_prediction = model.predict(processed_seq) | |
return f"Input is {seq}.\nProcessed input is:\n{processed_seq}\n\nModel makes prediction:\n{raw_prediction}" | |
iface = gr.Interface(fn=greet, inputs="text", outputs="text") | |
iface.launch() |