finnstrom3693
commited on
Commit
•
ad1f612
1
Parent(s):
121d5a6
Update modeling-dev.py
Browse files- modeling-dev.py +1 -1
modeling-dev.py
CHANGED
@@ -49,7 +49,7 @@ class MiniSunModel(tf.keras.Model):
|
|
49 |
super(MiniSunModel, self).build(input_shape)
|
50 |
|
51 |
def _build_decoder_block(self):
|
52 |
-
# Decoder block with multi-query attention and feed-forward layers, using
|
53 |
return [
|
54 |
layers.MultiHeadAttention(num_heads=self.config.num_attention_heads, key_dim=self.config.hidden_size,
|
55 |
kernel_initializer=initializers.he_normal(),
|
|
|
49 |
super(MiniSunModel, self).build(input_shape)
|
50 |
|
51 |
def _build_decoder_block(self):
|
52 |
+
# Decoder block with multi-query attention and feed-forward layers, using regularization
|
53 |
return [
|
54 |
layers.MultiHeadAttention(num_heads=self.config.num_attention_heads, key_dim=self.config.hidden_size,
|
55 |
kernel_initializer=initializers.he_normal(),
|