add t5x checkpoint
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -0
- t5x/checkpoint_1400000/checkpoint +3 -0
- t5x/checkpoint_1400000/state.param_states.decoder.decoder_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.decoder_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.decoder.relpos_bias.rel_embedding.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.decoder.relpos_bias.rel_embedding.v/0.0 +3 -0
- t5x/checkpoint_1400000/state.param_states.encoder.encoder_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.encoder.encoder_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1400000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1400000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/0 +0 -0
.gitattributes
CHANGED
@@ -33,4 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
|
|
|
|
|
36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
|
36 |
+
*.0 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
checkpoint filter=lfs diff=lfs merge=lfs -text
|
38 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
t5x/checkpoint_1400000/checkpoint
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8499eb03904c58e960019c0d7b26ba3a20f8406d91ac0cb9ef139129e1972f8e
|
3 |
+
size 2799177
|
t5x/checkpoint_1400000/state.param_states.decoder.decoder_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.decoder_norm.scale.v/0
ADDED
Binary file (5.33 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (5.49 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.4 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (5.58 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (5.3 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.27 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (5.38 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (5.27 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.21 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (5.3 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (5.39 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.24 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (5.33 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (5.42 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.4 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (5.32 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (5.36 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.28 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (5.36 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.decoder.relpos_bias.rel_embedding.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[12,32],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[12,32],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.decoder.relpos_bias.rel_embedding.v/0.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a00ec0b745067c243f7f91584c176fc06bd7d96a7b5d22c94b6aa40b24134228
|
3 |
+
size 1489
|
t5x/checkpoint_1400000/state.param_states.encoder.encoder_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.encoder.encoder_norm.scale.v/0
ADDED
Binary file (5.31 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (5.57 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.51 kB). View file
|
|
t5x/checkpoint_1400000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1400000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (5.56 kB). View file
|
|