finnstrom3693 commited on
Commit
ad1f612
1 Parent(s): 121d5a6

Update modeling-dev.py

Browse files
Files changed (1) hide show
  1. modeling-dev.py +1 -1
modeling-dev.py CHANGED
@@ -49,7 +49,7 @@ class MiniSunModel(tf.keras.Model):
49
  super(MiniSunModel, self).build(input_shape)
50
 
51
  def _build_decoder_block(self):
52
- # Decoder block with multi-query attention and feed-forward layers, using RMSNorm and regularization
53
  return [
54
  layers.MultiHeadAttention(num_heads=self.config.num_attention_heads, key_dim=self.config.hidden_size,
55
  kernel_initializer=initializers.he_normal(),
 
49
  super(MiniSunModel, self).build(input_shape)
50
 
51
  def _build_decoder_block(self):
52
+ # Decoder block with multi-query attention and feed-forward layers, using regularization
53
  return [
54
  layers.MultiHeadAttention(num_heads=self.config.num_attention_heads, key_dim=self.config.hidden_size,
55
  kernel_initializer=initializers.he_normal(),