Spaces:
Sleeping
Sleeping
import tensorflow as tf | |
import keras | |
from keras import layers | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.metrics import auc, roc_curve | |
def positional_encoding(length, depth): | |
depth = depth/2 | |
positions = np.arange(length)[:, np.newaxis] | |
depths = np.arange(depth)[np.newaxis, :]/depth | |
angle_rates = 1/(10000**depths) | |
angle_rads = positions * angle_rates | |
pos_encoding = np.concatenate( | |
[np.sin(angle_rads), np.cos(angle_rads)], | |
axis=-1 | |
) | |
return tf.cast(pos_encoding, dtype=tf.float32) | |
# Token Emebdding Layer and Positional Encoding | |
class TokenEmbedding(layers.Layer): | |
def __init__(self, vocab_size, emb_dim, max_len, dropout = None, regularizer = None): | |
super(TokenEmbedding, self).__init__() | |
self.vocab_size = vocab_size | |
self.emb_dim = emb_dim | |
self.max_len = max_len | |
self.token_emb = layers.Embedding( | |
self.vocab_size, self.emb_dim, mask_zero=True, embeddings_regularizer = regularizer | |
) | |
self.pos_enc = positional_encoding(self.max_len, self.emb_dim) | |
self.dropout = dropout | |
if self.dropout is not None: | |
self.dropout_layer = layers.Dropout(self.dropout) | |
def compute_mask(self, *args, **kwargs): | |
return self.token_emb.compute_mask(*args, **kwargs) | |
def call(self, x): | |
length = tf.shape(x)[1] | |
token_emb = self.token_emb(x) | |
token_emb *= tf.math.sqrt(tf.cast(self.emb_dim, tf.float32)) | |
token_emb = token_emb + self.pos_enc[tf.newaxis, :length, :] | |
if self.dropout is not None: | |
return self.dropout_layer(token_emb) | |
else: | |
return token_emb | |
class Encoder(layers.Layer): | |
def __init__( | |
self, | |
vocab_size, | |
maxlen, | |
emb_dim, | |
num_heads, | |
ffn_dim, | |
dropout=0.1, | |
regularizer = None | |
): | |
super(Encoder, self).__init__() | |
self.vocab_size = vocab_size | |
self.maxlen = maxlen | |
self.emb_dim = emb_dim | |
self.num_heads = num_heads | |
self.ffn_dim = ffn_dim | |
self.dropout = dropout | |
self.attention = None | |
self.regularizer = regularizer | |
# In most of the Attention implementation the query, key and value layer do not have biased added | |
# even in formula we just multipy with the weights and do not add bias. | |
self.attn = layers.MultiHeadAttention(self.num_heads, self.emb_dim, use_bias=False, kernel_regularizer=self.regularizer) | |
self.ffn_layer = keras.Sequential([ | |
layers.Dense(self.ffn_dim, activation='relu', kernel_regularizer=self.regularizer), | |
layers.Dropout(self.dropout), | |
layers.Dense(self.emb_dim, kernel_regularizer=self.regularizer) | |
]) | |
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6) | |
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6) | |
self.dropout1 = layers.Dropout(self.dropout) | |
self.dropout2 = layers.Dropout(self.dropout) | |
def call(self, x): | |
attn_output = self.attn(query=x, key=x, value=x, use_causal_mask = True) | |
x = self.layernorm1(x + self.dropout1(attn_output)) | |
ffn_output = self.ffn_layer(x) | |
x = self.layernorm2(x + self.dropout2(ffn_output)) | |
return x | |
class Transformer(keras.Model): | |
def __init__( | |
self, | |
vocab_size, | |
maxlen, | |
emb_dim, | |
num_heads, | |
ffn_dim, | |
num_classes, | |
num_layers = 1, | |
dropout = 0.1, | |
regularizer = None | |
): | |
super(Transformer, self).__init__() | |
self.vocab_size = vocab_size | |
self.maxlen = maxlen | |
self.emb_dim = emb_dim | |
self.maxlen = maxlen | |
self.emb_dim = emb_dim | |
self.num_heads = num_heads | |
self.ffn_dim = ffn_dim | |
self.num_classes = num_classes | |
self.num_layers = num_layers | |
self.dropout = dropout | |
self.regularizer = regularizer | |
self.token_emb = TokenEmbedding(self.vocab_size, self.emb_dim, self.maxlen, self.dropout, self.regularizer) | |
self.encoder_stack = keras.Sequential([ | |
Encoder(self.vocab_size, self.maxlen, self.emb_dim, self.num_heads, self.ffn_dim, self.dropout, self.regularizer) | |
for _ in range(self.num_layers) | |
]) | |
self.average_pool = layers.GlobalAveragePooling1D() | |
self.dropout_layer = layers.Dropout(self.dropout) | |
self.clf_head = layers.Dense(self.num_classes, activation='softmax', kernel_regularizer=self.regularizer) | |
def call(self, x): | |
x = self.token_emb(x) | |
x = self.encoder_stack(x) | |
x = self.average_pool(x) | |
x = self.dropout_layer(x) | |
probs = self.clf_head(x) | |
return probs | |
# Tooked reference my Deep learning Week-5 Assignment | |
def visualize_model(self, history): | |
plt.figure(figsize=(14, 6)) | |
# Extract the metrics to visulalize | |
metrics = [] | |
# Getting all the metrics we have while model training | |
hist_metrics = history.history.keys() | |
for item in hist_metrics: | |
if item.startswith("val"): | |
continue | |
metrics.append(item) | |
for indx, metric in enumerate(metrics): | |
title = f'{metric}' | |
legends = [metric] | |
plt.subplot(1, 2, indx+1) | |
plt.plot(history.history[metric], label=metric, marker='o') | |
val_metric = 'val_' + metric | |
if val_metric in hist_metrics: | |
title += f" vs {val_metric}" | |
plt.plot(history.history[val_metric], label=val_metric, marker='^') | |
legends.append(val_metric) | |
plt.legend(legends) | |
plt.title(title) | |
plt.show() | |
def preds(self, dataset: tf.data.Dataset): | |
y_true = [] | |
y_pred = [] | |
dataset_len = len(dataset) | |
for inp, label in dataset.take(dataset_len): | |
pred = self.call(inp).numpy() | |
y_true.extend(label.numpy()) | |
y_pred.extend(pred) | |
y_true = np.array(y_true) | |
y_pred = np.array(y_pred) | |
y_true_label = np.argmax(y_true, axis=-1) | |
y_pred_label = np.argmax(y_pred, axis=-1) | |
return y_true, y_true_label, y_pred, y_pred_label | |
def plot_confusion_matrix(self, conf_matrix, labels): | |
plt.figure(figsize=(8, 6)) | |
plt.title("Confusion Matrix", {'size': 14}) | |
sns.heatmap(conf_matrix, annot=True, fmt='d', xticklabels=labels, yticklabels=labels) | |
plt.xlabel("Predicted", {'size': 12}) | |
plt.ylabel("Actual", {'size': 12}) | |
plt.show() | |
def plot_roc_curve(self, y_true, y_pred, labels): | |
fpr = dict() | |
tpr = dict() | |
roc_auc = dict() | |
for i, label in enumerate(labels): | |
fpr[label], tpr[label], _ = roc_curve(y_true[:, i], y_pred[:, i]) | |
roc_auc[label] = auc(fpr[label], tpr[label]) | |
fpr["micro"], tpr["micro"], _ = roc_curve(y_true.ravel(), y_pred.ravel()) | |
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) | |
plt.figure(figsize=(6, 6)) | |
plt.title("ROC Curve", {'size': 14}) | |
plt.plot(fpr["micro"], tpr["micro"], label=f"ROC micro-avg area({roc_auc['micro']*100:.1f}%)") | |
for label in labels: | |
plt.plot(fpr[label], tpr[label], label=f"ROC {label} area({roc_auc[label]*100:.1f})%") | |
plt.plot([0, 1], [0, 1], 'k--', label='No Skill') | |
plt.xlim([-0.05, 1.05]) | |
plt.ylim([-0.05, 1.05]) | |
plt.xlabel("False Positive Rate") | |
plt.ylabel("True Positive Rate") | |
plt.grid() | |
plt.legend(loc="lower right") | |
plt.show() | |
def get_config(self): | |
base_config = super().get_config() | |
config = { | |
"vocab_size": self.vocab_size, | |
"maxlen": self.maxlen, | |
"emb_dim": self.emb_dim, | |
"num_heads": self.num_heads, | |
"ffn_dim": self.ffn_dim, | |
"num_classes": self.num_classes, | |
"num_layers": self.num_layers, | |
"dropout": self.dropout, | |
"regularizer": self.regularizer | |
} | |
return {**base_config, **config} | |
def from_config(cls, config): | |
vocab_size = config.pop("vocab_size") | |
maxlen = config.pop("maxlen") | |
emb_dim = config.pop("emb_dim") | |
num_heads = config.pop("num_heads") | |
ffn_dim = config.pop("ffn_dim") | |
num_classes = config.pop("num_classes") | |
num_layers = config.pop("num_layers") | |
dropout = config.pop("dropout") | |
regularizer = config.pop("regularizer") | |
return cls(vocab_size, maxlen, emb_dim, num_heads, ffn_dim, num_classes, | |
num_layers, dropout, regularizer) | |
def get_model(filepath): | |
return keras.models.load_model(filepath) |