ayush-mila commited on
Commit
24dceeb
·
verified ·
1 Parent(s): 53a7b95

Add files using upload-large-folder tool

Browse files
Files changed (49) hide show
  1. 65536_390Mp_300Bt/step76000/configuration_spectra2.py +174 -0
  2. data-indices/rank106.tsv.gz +3 -0
  3. data-indices/rank125.tsv.gz +3 -0
  4. data-indices/rank133.tsv.gz +3 -0
  5. data-indices/rank165.tsv.gz +3 -0
  6. data-indices/rank167.tsv.gz +3 -0
  7. data-indices/rank17.tsv.gz +3 -0
  8. data-indices/rank171.tsv.gz +3 -0
  9. data-indices/rank192.tsv.gz +3 -0
  10. data-indices/rank205.tsv.gz +3 -0
  11. data-indices/rank208.tsv.gz +3 -0
  12. data-indices/rank22.tsv.gz +3 -0
  13. data-indices/rank232.tsv.gz +3 -0
  14. data-indices/rank253.tsv.gz +3 -0
  15. data-indices/rank39.tsv.gz +3 -0
  16. data-indices/rank57.tsv.gz +3 -0
  17. data-indices/rank8.tsv.gz +3 -0
  18. data-indices/rank81.tsv.gz +3 -0
  19. data-indices/rank97.tsv.gz +3 -0
  20. step10000-unsharded/train.pt +3 -0
  21. step100000-unsharded/train.pt +3 -0
  22. step102000-unsharded/train.pt +3 -0
  23. step104000-unsharded/train.pt +3 -0
  24. step108000-unsharded/train.pt +3 -0
  25. step114000-unsharded/config.yaml +465 -0
  26. step12000-unsharded/train.pt +3 -0
  27. step128000-unsharded/train.pt +3 -0
  28. step14000-unsharded/train.pt +3 -0
  29. step142000-unsharded/config.yaml +465 -0
  30. step144000-unsharded/train.pt +3 -0
  31. step146000-unsharded/config.yaml +465 -0
  32. step150000-unsharded/train.pt +3 -0
  33. step150000/config.yaml +465 -0
  34. step150000/rank4.pt +3 -0
  35. step150010-unsharded/config.yaml +465 -0
  36. step16000-unsharded/train.pt +3 -0
  37. step2000-unsharded/train.pt +3 -0
  38. step28000-unsharded/config.yaml +465 -0
  39. step38000-unsharded/train.pt +3 -0
  40. step40000-unsharded/train.pt +3 -0
  41. step44000-unsharded/train.pt +3 -0
  42. step46000-unsharded/train.pt +3 -0
  43. step50000-unsharded/config.yaml +465 -0
  44. step52000-unsharded/train.pt +3 -0
  45. step54000-unsharded/config.yaml +465 -0
  46. step66000-unsharded/train.pt +3 -0
  47. step68000-unsharded/train.pt +3 -0
  48. step78000-unsharded/train.pt +3 -0
  49. step98000-unsharded/train.pt +3 -0
65536_390Mp_300Bt/step76000/configuration_spectra2.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2024 EleutherAI and the HuggingFace Inc. team. All rights reserved.
3
+ #
4
+ # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
5
+ # and OPT implementations in this library. It has been modified from its
6
+ # original forms to accommodate minor architectural differences compared
7
+ # to GPT-NeoX and OPT used by the Meta AI team that trained the model.
8
+ #
9
+ # Licensed under the Apache License, Version 2.0 (the "License");
10
+ # you may not use this file except in compliance with the License.
11
+ # You may obtain a copy of the License at
12
+ #
13
+ # http://www.apache.org/licenses/LICENSE-2.0
14
+ #
15
+ # Unless required by applicable law or agreed to in writing, software
16
+ # distributed under the License is distributed on an "AS IS" BASIS,
17
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ # See the License for the specific language governing permissions and
19
+ # limitations under the License.
20
+ """Spectra2 model configuration"""
21
+
22
+ from transformers.configuration_utils import PretrainedConfig
23
+ from transformers.utils import logging
24
+
25
+
26
+ logger = logging.get_logger(__name__)
27
+
28
+
29
+ class Spectra2Config(PretrainedConfig):
30
+ r"""
31
+ This is the configuration class to store the configuration of a [`Spectra2Model`]. It is used to instantiate an Spectra2
32
+ model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
33
+ defaults will yield a similar configuration to that of the [SpectraSuite/Spectra2-3B-base](https://huggingface.co/spectrasuite/Spectra2-3B-base).
34
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
35
+ documentation from [`PretrainedConfig`] for more information.
36
+ Args:
37
+ vocab_size (`int`, *optional*, defaults to 50304):
38
+ Vocabulary size of the Spectra2 model. Defines the number of different tokens that can be represented by the
39
+ `inputs_ids` passed when calling [`Spectra2Model`]
40
+ hidden_size (`int`, *optional*, defaults to 4096):
41
+ Dimension of the hidden representations.
42
+ intermediate_size (`int`, *optional*, defaults to 11008):
43
+ Dimension of the MLP representations.
44
+ num_hidden_layers (`int`, *optional*, defaults to 32):
45
+ Number of hidden layers in the Transformer decoder.
46
+ num_attention_heads (`int`, *optional*, defaults to 32):
47
+ Number of attention heads for each attention layer in the Transformer decoder.
48
+ num_key_value_heads (`int`, *optional*):
49
+ This is the number of key_value heads that should be used to implement Grouped Query Attention. If
50
+ `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
51
+ `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
52
+ converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
53
+ by meanpooling all the original heads within that group. For more details checkout [this
54
+ paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
55
+ `num_attention_heads`.
56
+ hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
57
+ The non-linear activation function (function or string) in the decoder.
58
+ max_position_embeddings (`int`, *optional*, defaults to 2048):
59
+ The maximum sequence length that this model might ever be used with.
60
+ initializer_range (`float`, *optional*, defaults to 0.02):
61
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
62
+ use_cache (`bool`, *optional*, defaults to `True`):
63
+ Whether or not the model should return the last key/values attentions (not used by all models). Only
64
+ relevant if `config.is_decoder=True`.
65
+ pad_token_id (`int`, *optional*, defaults to 1):
66
+ Padding token id.
67
+ bos_token_id (`int`, *optional*):
68
+ Beginning of stream token id.
69
+ eos_token_id (`int`, *optional*, defaults to 50279):
70
+ End of stream token id.
71
+ tie_word_embeddings (`bool`, *optional*, defaults to `False`):
72
+ Whether to tie weight embeddings
73
+ rope_theta (`float`, *optional*, defaults to 10000.0):
74
+ The base period of the RoPE embeddings.
75
+ rope_scaling (`Dict`, *optional*):
76
+ Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
77
+ strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
78
+ `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update
79
+ `max_position_embeddings` to the expected new maximum. See the following thread for more information on how
80
+ these scaling strategies behave:
81
+ https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an
82
+ experimental feature, subject to breaking API changes in future versions.
83
+ attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
84
+ Whether to use a bias in the query, key, value and output projection layers during self-attention.
85
+ attention_dropout (`float`, *optional*, defaults to 0.0):
86
+ The dropout ratio for the attention probabilities.
87
+ clip_qkv (`float`, *optional*):
88
+ If not `None`, elements of query, key and value attention states are clipped so that their
89
+ absolute value does not exceed this value.
90
+ ```python
91
+ >>> from transformers import Spectra2Model, Spectra2Config
92
+ >>> # Initializing a Spectra2 3B style configuration
93
+ >>> configuration = Spectra2Config()
94
+ >>> # Initializing a model from the Spectra2 3B style configuration
95
+ >>> model = Spectra2Model(configuration)
96
+ >>> # Accessing the model configuration
97
+ >>> configuration = model.config
98
+ ```"""
99
+
100
+ model_type = "spectra2"
101
+ keys_to_ignore_at_inference = ["past_key_values"]
102
+
103
+ def __init__(
104
+ self,
105
+ vocab_size=50304,
106
+ hidden_size=4096,
107
+ intermediate_size=11008,
108
+ num_hidden_layers=32,
109
+ num_attention_heads=32,
110
+ num_key_value_heads=None,
111
+ hidden_act="silu",
112
+ max_position_embeddings=2048,
113
+ initializer_range=0.02,
114
+ use_cache=True,
115
+ pad_token_id=1,
116
+ bos_token_id=None,
117
+ eos_token_id=50279,
118
+ tie_word_embeddings=False,
119
+ rope_theta=10000.0,
120
+ rope_scaling=None,
121
+ attention_bias=False,
122
+ attention_dropout=0.0,
123
+ clip_qkv=None,
124
+ **kwargs,
125
+ ):
126
+ self.vocab_size = vocab_size
127
+ self.max_position_embeddings = max_position_embeddings
128
+ self.hidden_size = hidden_size
129
+ self.intermediate_size = intermediate_size
130
+ self.num_hidden_layers = num_hidden_layers
131
+ self.num_attention_heads = num_attention_heads
132
+
133
+ # for backward compatibility
134
+ if num_key_value_heads is None:
135
+ num_key_value_heads = num_attention_heads
136
+
137
+ self.num_key_value_heads = num_key_value_heads
138
+ self.hidden_act = hidden_act
139
+ self.initializer_range = initializer_range
140
+ self.use_cache = use_cache
141
+ self.rope_theta = rope_theta
142
+ self.rope_scaling = rope_scaling
143
+ self._rope_scaling_validation()
144
+ self.attention_bias = attention_bias
145
+ self.attention_dropout = attention_dropout
146
+ self.clip_qkv = clip_qkv
147
+
148
+ super().__init__(
149
+ pad_token_id=pad_token_id,
150
+ bos_token_id=bos_token_id,
151
+ eos_token_id=eos_token_id,
152
+ tie_word_embeddings=tie_word_embeddings,
153
+ **kwargs,
154
+ )
155
+
156
+ def _rope_scaling_validation(self):
157
+ """
158
+ Validate the `rope_scaling` configuration.
159
+ """
160
+ if self.rope_scaling is None:
161
+ return
162
+
163
+ if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
164
+ raise ValueError(
165
+ "`rope_scaling` must be a dictionary with two fields, `type` and `factor`, " f"got {self.rope_scaling}"
166
+ )
167
+ rope_scaling_type = self.rope_scaling.get("type", None)
168
+ rope_scaling_factor = self.rope_scaling.get("factor", None)
169
+ if rope_scaling_type is None or rope_scaling_type not in ["linear", "dynamic"]:
170
+ raise ValueError(
171
+ f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
172
+ )
173
+ if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
174
+ raise ValueError(f"`rope_scaling`'s factor field must be a float > 1, got {rope_scaling_factor}")
data-indices/rank106.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d30d212292517f657c7ff763e0fdbf0b4ead713feccf6bb043ebb5f5b792df5
3
+ size 64292
data-indices/rank125.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d848f5d5f188a6d38c95bc7f47d9cd936ce5a27f08cfdf21a5d250e4fcb237d
3
+ size 64293
data-indices/rank133.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b66e318901cf6608122de8d46c569576c3ad1af9467e41a79e43d05bc82f89f
3
+ size 64153
data-indices/rank165.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6839ad6c56a2886ea38800e3df935a91a070e958c785eee495c7ee014d4b9efc
3
+ size 64085
data-indices/rank167.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ee4a25ad0bd39b862274a774da0b023e5cafc64c5506ec8cd9b7c82c710b09f
3
+ size 64200
data-indices/rank17.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc859b14e127f53f9136bd73e7891d6b6d6b0cf1674175f950048785570edde3
3
+ size 64132
data-indices/rank171.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40dc1f946780371f8a44b78d790afe4526158ec94e40f91f880f2a8a49070c8b
3
+ size 64102
data-indices/rank192.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:135e17dc51e6b37c74d4f73194b0a129730992f0f14a4ec5f9aef4373bc85bf1
3
+ size 64187
data-indices/rank205.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c511a30427d1cc4faf5a9eadca8833c1b06e1462a1a3839e54568f85aca1d2c
3
+ size 64164
data-indices/rank208.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c0d49cc17b002f7b61956d12ccfc55a4824e1968ba9fb45144d7b7848eb984d
3
+ size 64167
data-indices/rank22.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c28ac3c514b13afc2096bff16b1b1c6f244d71254c94a3551914bb04cbb4fc6
3
+ size 64216
data-indices/rank232.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e8497be100a87a2850d54b63aae6ee671c223296ee5e292a6eb1dbd4f6d5dc5
3
+ size 64236
data-indices/rank253.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9d746b9a85742fa1e28deb6e8a66ac65066a82584053d8f7ddb039142be611e
3
+ size 64163
data-indices/rank39.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9385f7b6c27ecf17a69069f50ba9c9e4a3c08243884b418516b7b296ff764cd0
3
+ size 64163
data-indices/rank57.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a2b5e731fe6f4ab666fbd044bbd9c69066db84204407158c455e3e05ca6a431
3
+ size 64225
data-indices/rank8.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75815b5e8ba0cd22ba92d1041aa35fe533bda76ad7a8829b2e3853f67eeb5b7a
3
+ size 64193
data-indices/rank81.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da6d94d3ca055bfa35cf9f3eb0b56f10f9a3baf013b9f0c882042d873b7291b0
3
+ size 64143
data-indices/rank97.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ec88570363f1f7c329bc732fd567cedd49d87bc5d834d242c3ab8131305f554
3
+ size 64224
step10000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:badac51cef285c6a1138d03813e72699c5b098a55e40ae76d37958f7d8a90bb1
3
+ size 14668
step100000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aff47706d8099531eed41ac0be67476bdcc37450c640d3d1e0d41b82ef5da77
3
+ size 16076
step102000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c6d3f452ab807faa70fd29b1b66297f51350732d605a8a9933e393dae7f373e
3
+ size 16076
step104000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fad02ae82b3833cb2ab5d08c0c0a76f4918a6fa17ab9313cd47feeb3ba9ab2c9
3
+ size 16140
step108000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f94de21e15fba3505bd19ecb67848f1336229a7ca436b1abfce9e0a95cd6e65c
3
+ size 16204
step114000-unsharded/config.yaml ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: floatlm_390M_300B
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 1024
7
+ n_heads: 16
8
+ n_kv_heads: null
9
+ clip_qkv: null
10
+ n_layers: 24
11
+ mlp_ratio: 4
12
+ mlp_hidden_size: 5120
13
+ activation_type: swiglu
14
+ block_type: sequential
15
+ block_group_size: 1
16
+ alibi: false
17
+ alibi_bias_max: 8.0
18
+ rope: true
19
+ rope_full_precision: true
20
+ rope_theta: 10000
21
+ flash_attention: false
22
+ attention_dropout: 0.0
23
+ multi_query_attention: false
24
+ attention_layer_norm: false
25
+ residual_dropout: 0.0
26
+ embedding_dropout: 0.0
27
+ embedding_layer_norm: false
28
+ layer_norm_type: rms
29
+ layer_norm_with_affine: true
30
+ layer_norm_eps: 1.0e-05
31
+ attention_layer_norm_with_affine: false
32
+ max_sequence_length: 2048
33
+ include_bias: false
34
+ bias_for_layer_norm: false
35
+ scale_logits: false
36
+ vocab_size: 50277
37
+ embedding_size: 50304
38
+ weight_tying: false
39
+ eos_token_id: 50277
40
+ pad_token_id: 1
41
+ init_device: meta
42
+ init_fn: mitchell
43
+ init_std: 0.02
44
+ init_cutoff_factor: null
45
+ precision: amp_bf16
46
+ scale_emb_init: false
47
+ emb_init_std: null
48
+ norm_after: false
49
+ linear_type: floatlm
50
+ num_trilm_matrix_scales: 0
51
+ optimizer:
52
+ name: adamw
53
+ learning_rate: 0.0003
54
+ weight_decay: 0.1
55
+ betas:
56
+ - 0.9
57
+ - 0.95
58
+ eps: 1.0e-05
59
+ no_decay_norm_and_bias: null
60
+ selective_updates: false
61
+ decay_norm_and_bias: false
62
+ decay_embeddings: false
63
+ metrics_log_interval: 10
64
+ record_update_metrics: false
65
+ scheduler:
66
+ name: cosine_with_warmup
67
+ units: steps
68
+ t_warmup: 375
69
+ t_max: null
70
+ alpha_f: 0.1
71
+ grad_clip_warmup_steps: null
72
+ grad_clip_warmup_factor: null
73
+ warmup_min_lr: null
74
+ remove_weight_decay_in_second_half: false
75
+ data:
76
+ paths:
77
+ - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
78
+ - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
79
+ - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
80
+ - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
81
+ - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
82
+ - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
83
+ - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
84
+ - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
85
+ - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
86
+ - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
87
+ - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
88
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
89
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
90
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
91
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
92
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
93
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
94
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
95
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
96
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
97
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
98
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
99
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
100
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
101
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
102
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
103
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
104
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
105
+ - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
106
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
107
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
108
+ - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
109
+ - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
110
+ - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
111
+ - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
112
+ - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
113
+ - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
114
+ - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
115
+ - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
116
+ - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
117
+ - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
118
+ - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
119
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
120
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
121
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
122
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
123
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
124
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
125
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
126
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
127
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
128
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
129
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
130
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
131
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
132
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
133
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
134
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
135
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
136
+ - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
137
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
138
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
139
+ - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
140
+ - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
141
+ - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
142
+ - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
143
+ - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
144
+ - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
145
+ - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
146
+ - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
147
+ - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
148
+ - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
149
+ - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
150
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
151
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
152
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
153
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
154
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
155
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
156
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
157
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
158
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
159
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
160
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
161
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
162
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
163
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
164
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
165
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
166
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
167
+ - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
168
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
169
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
170
+ - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
171
+ - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
172
+ - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
173
+ - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
174
+ - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
175
+ - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
176
+ - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
177
+ - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
178
+ - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
179
+ - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
180
+ - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
181
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
182
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
183
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
184
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
185
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
186
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
187
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
188
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
189
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
190
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
191
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
192
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
193
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
194
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
195
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
196
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
197
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
198
+ - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
199
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
200
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
201
+ - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
202
+ - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
203
+ - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
204
+ - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
205
+ - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
206
+ - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
207
+ - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
208
+ - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
209
+ - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
210
+ - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
211
+ - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
212
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
213
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
214
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
215
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
216
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
217
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
218
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
219
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
220
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
221
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
222
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
223
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
224
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
225
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
226
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
227
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
228
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
229
+ - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
230
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
231
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
232
+ - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
233
+ - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
234
+ - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
235
+ - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
236
+ - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
237
+ - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
238
+ - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
239
+ - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
240
+ - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
241
+ - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
242
+ - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
243
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
244
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
245
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
246
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
247
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
248
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
249
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
250
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
251
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
252
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
253
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
254
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
255
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
256
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
257
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
258
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
259
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
260
+ - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
261
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
262
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
263
+ - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
264
+ - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
265
+ - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
266
+ - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
267
+ - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
268
+ - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
269
+ - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
270
+ - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
271
+ - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
272
+ - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
273
+ - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
274
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
275
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
276
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
277
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
278
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
279
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
280
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
281
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
282
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
283
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
284
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
285
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
286
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
287
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
288
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
289
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
290
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
291
+ - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
292
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
293
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
294
+ - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
295
+ - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
296
+ - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
297
+ - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
298
+ - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
299
+ - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
300
+ - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
301
+ - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
302
+ - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
303
+ - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
304
+ - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
305
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
306
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
307
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
308
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
309
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
310
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
311
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
312
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
313
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
314
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
315
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
316
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
317
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
318
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
319
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
320
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
321
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
322
+ - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
323
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
324
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
325
+ - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
326
+ - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
327
+ - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
328
+ - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
329
+ - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
330
+ - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
331
+ - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
332
+ - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
333
+ - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
334
+ - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
335
+ - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
336
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
337
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
338
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
339
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
340
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
341
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
342
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
343
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
344
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
345
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
346
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
347
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
348
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
349
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
350
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
351
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
352
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
353
+ - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
354
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
355
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
356
+ - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
357
+ - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
358
+ - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
359
+ - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
360
+ - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
361
+ - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
362
+ - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
363
+ - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
364
+ - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
365
+ - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
366
+ - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
367
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
368
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
369
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
370
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
371
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
372
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
373
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
374
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
375
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
376
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
377
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
378
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
379
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
380
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
381
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
382
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
383
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
384
+ - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
385
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
386
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
387
+ memmap_dtype: uint16
388
+ datasets: null
389
+ label_mask_paths: null
390
+ pad_direction: right
391
+ generate_attention_mask: false
392
+ generate_doc_lengths: false
393
+ num_workers: 0
394
+ drop_last: true
395
+ pin_memory: true
396
+ prefetch_factor: 16
397
+ persistent_workers: true
398
+ timeout: 0
399
+ seed: null
400
+ instance_filter: null
401
+ restore_dataloader: true
402
+ fast_forward_batches: null
403
+ evaluators: []
404
+ eval_interval: 500
405
+ tokenizer:
406
+ identifier: ../spectra_tokenizer/tokenizer.json
407
+ truncate_direction: right
408
+ save_folder: checkpoints/floatlm_390M_300B/
409
+ remote_save_folder: null
410
+ canceled_check_interval: 50
411
+ save_interval: 500
412
+ save_interval_unsharded: 2000
413
+ save_interval_ephemeral: null
414
+ save_num_checkpoints_to_keep: 1
415
+ save_num_unsharded_checkpoints_to_keep: -1
416
+ save_overwrite: true
417
+ force_save_unsharded: false
418
+ no_pre_train_checkpoint: false
419
+ load_path: checkpoints/floatlm_390M_300B//latest
420
+ load_path_sharded_checkpointer: null
421
+ try_load_latest_save: false
422
+ reset_optimizer_state: false
423
+ reset_trainer_state: false
424
+ sharded_checkpointer: torch_legacy
425
+ new_style_checkpoints: null
426
+ max_duration: 150000
427
+ global_train_batch_size: 1024
428
+ device_train_batch_size: 4
429
+ device_train_microbatch_size: 4
430
+ device_eval_batch_size: 4
431
+ eval_subset_num_batches: -1
432
+ eval_on_load: false
433
+ device_train_grad_accum: 1
434
+ max_grad_norm: 1.0
435
+ max_grad_norm_ratio: null
436
+ precision: amp_bf16
437
+ wandb: null
438
+ speed_monitor:
439
+ window_size: 20
440
+ gpu_flops_available: null
441
+ console_log_interval: 1
442
+ gen1_gc_interval: 1
443
+ compile: null
444
+ distributed_strategy: fsdp
445
+ fsdp:
446
+ use_orig_params: true
447
+ sharding_strategy: _HYBRID_SHARD_ZERO2
448
+ wrapping_strategy: null
449
+ precision: pure
450
+ hybrid_sharding_num_model_replicas: null
451
+ ddp: null
452
+ softmax_auxiliary_loss: false
453
+ auxiliary_loss_multiplier: 0.0001
454
+ time_limit: null
455
+ extra_steps_after_cancel: 10
456
+ early_stopping_factor: null
457
+ save_data_indices: true
458
+ python_profiling: false
459
+ torch_profiling: false
460
+ stop_at: 150010
461
+ stop_after: null
462
+ activation_checkpointing: null
463
+ fused_loss: null
464
+ hf_datasets_cache_dir: null
465
+ module_outputs_save_steps: null
step12000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6346b4dce0cd3e6d7e92783fe32604640ba9fd300e24ae8b4daa91d68124546e
3
+ size 14668
step128000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30e5f44c7d04c6dcc47531bfe04d3e8d39eb3ca5fec0052928a191c56ccc38cd
3
+ size 16524
step14000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4df79143074498107a383ca3dacab3ec3c816db57249b482838bd9f7acd95a2c
3
+ size 14668
step142000-unsharded/config.yaml ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: floatlm_390M_300B
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 1024
7
+ n_heads: 16
8
+ n_kv_heads: null
9
+ clip_qkv: null
10
+ n_layers: 24
11
+ mlp_ratio: 4
12
+ mlp_hidden_size: 5120
13
+ activation_type: swiglu
14
+ block_type: sequential
15
+ block_group_size: 1
16
+ alibi: false
17
+ alibi_bias_max: 8.0
18
+ rope: true
19
+ rope_full_precision: true
20
+ rope_theta: 10000
21
+ flash_attention: false
22
+ attention_dropout: 0.0
23
+ multi_query_attention: false
24
+ attention_layer_norm: false
25
+ residual_dropout: 0.0
26
+ embedding_dropout: 0.0
27
+ embedding_layer_norm: false
28
+ layer_norm_type: rms
29
+ layer_norm_with_affine: true
30
+ layer_norm_eps: 1.0e-05
31
+ attention_layer_norm_with_affine: false
32
+ max_sequence_length: 2048
33
+ include_bias: false
34
+ bias_for_layer_norm: false
35
+ scale_logits: false
36
+ vocab_size: 50277
37
+ embedding_size: 50304
38
+ weight_tying: false
39
+ eos_token_id: 50277
40
+ pad_token_id: 1
41
+ init_device: meta
42
+ init_fn: mitchell
43
+ init_std: 0.02
44
+ init_cutoff_factor: null
45
+ precision: amp_bf16
46
+ scale_emb_init: false
47
+ emb_init_std: null
48
+ norm_after: false
49
+ linear_type: floatlm
50
+ num_trilm_matrix_scales: 0
51
+ optimizer:
52
+ name: adamw
53
+ learning_rate: 0.0003
54
+ weight_decay: 0.1
55
+ betas:
56
+ - 0.9
57
+ - 0.95
58
+ eps: 1.0e-05
59
+ no_decay_norm_and_bias: null
60
+ selective_updates: false
61
+ decay_norm_and_bias: false
62
+ decay_embeddings: false
63
+ metrics_log_interval: 10
64
+ record_update_metrics: false
65
+ scheduler:
66
+ name: cosine_with_warmup
67
+ units: steps
68
+ t_warmup: 375
69
+ t_max: null
70
+ alpha_f: 0.1
71
+ grad_clip_warmup_steps: null
72
+ grad_clip_warmup_factor: null
73
+ warmup_min_lr: null
74
+ remove_weight_decay_in_second_half: false
75
+ data:
76
+ paths:
77
+ - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
78
+ - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
79
+ - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
80
+ - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
81
+ - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
82
+ - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
83
+ - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
84
+ - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
85
+ - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
86
+ - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
87
+ - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
88
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
89
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
90
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
91
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
92
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
93
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
94
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
95
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
96
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
97
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
98
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
99
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
100
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
101
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
102
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
103
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
104
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
105
+ - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
106
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
107
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
108
+ - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
109
+ - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
110
+ - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
111
+ - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
112
+ - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
113
+ - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
114
+ - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
115
+ - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
116
+ - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
117
+ - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
118
+ - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
119
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
120
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
121
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
122
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
123
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
124
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
125
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
126
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
127
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
128
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
129
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
130
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
131
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
132
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
133
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
134
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
135
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
136
+ - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
137
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
138
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
139
+ - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
140
+ - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
141
+ - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
142
+ - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
143
+ - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
144
+ - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
145
+ - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
146
+ - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
147
+ - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
148
+ - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
149
+ - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
150
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
151
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
152
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
153
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
154
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
155
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
156
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
157
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
158
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
159
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
160
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
161
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
162
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
163
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
164
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
165
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
166
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
167
+ - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
168
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
169
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
170
+ - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
171
+ - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
172
+ - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
173
+ - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
174
+ - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
175
+ - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
176
+ - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
177
+ - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
178
+ - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
179
+ - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
180
+ - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
181
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
182
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
183
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
184
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
185
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
186
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
187
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
188
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
189
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
190
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
191
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
192
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
193
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
194
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
195
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
196
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
197
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
198
+ - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
199
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
200
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
201
+ - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
202
+ - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
203
+ - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
204
+ - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
205
+ - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
206
+ - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
207
+ - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
208
+ - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
209
+ - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
210
+ - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
211
+ - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
212
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
213
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
214
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
215
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
216
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
217
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
218
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
219
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
220
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
221
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
222
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
223
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
224
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
225
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
226
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
227
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
228
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
229
+ - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
230
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
231
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
232
+ - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
233
+ - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
234
+ - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
235
+ - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
236
+ - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
237
+ - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
238
+ - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
239
+ - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
240
+ - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
241
+ - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
242
+ - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
243
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
244
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
245
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
246
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
247
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
248
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
249
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
250
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
251
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
252
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
253
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
254
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
255
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
256
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
257
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
258
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
259
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
260
+ - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
261
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
262
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
263
+ - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
264
+ - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
265
+ - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
266
+ - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
267
+ - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
268
+ - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
269
+ - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
270
+ - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
271
+ - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
272
+ - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
273
+ - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
274
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
275
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
276
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
277
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
278
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
279
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
280
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
281
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
282
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
283
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
284
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
285
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
286
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
287
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
288
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
289
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
290
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
291
+ - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
292
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
293
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
294
+ - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
295
+ - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
296
+ - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
297
+ - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
298
+ - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
299
+ - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
300
+ - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
301
+ - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
302
+ - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
303
+ - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
304
+ - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
305
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
306
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
307
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
308
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
309
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
310
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
311
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
312
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
313
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
314
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
315
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
316
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
317
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
318
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
319
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
320
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
321
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
322
+ - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
323
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
324
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
325
+ - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
326
+ - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
327
+ - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
328
+ - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
329
+ - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
330
+ - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
331
+ - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
332
+ - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
333
+ - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
334
+ - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
335
+ - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
336
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
337
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
338
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
339
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
340
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
341
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
342
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
343
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
344
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
345
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
346
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
347
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
348
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
349
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
350
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
351
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
352
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
353
+ - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
354
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
355
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
356
+ - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
357
+ - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
358
+ - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
359
+ - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
360
+ - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
361
+ - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
362
+ - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
363
+ - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
364
+ - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
365
+ - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
366
+ - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
367
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
368
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
369
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
370
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
371
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
372
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
373
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
374
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
375
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
376
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
377
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
378
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
379
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
380
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
381
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
382
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
383
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
384
+ - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
385
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
386
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
387
+ memmap_dtype: uint16
388
+ datasets: null
389
+ label_mask_paths: null
390
+ pad_direction: right
391
+ generate_attention_mask: false
392
+ generate_doc_lengths: false
393
+ num_workers: 0
394
+ drop_last: true
395
+ pin_memory: true
396
+ prefetch_factor: 16
397
+ persistent_workers: true
398
+ timeout: 0
399
+ seed: null
400
+ instance_filter: null
401
+ restore_dataloader: true
402
+ fast_forward_batches: null
403
+ evaluators: []
404
+ eval_interval: 500
405
+ tokenizer:
406
+ identifier: ../spectra_tokenizer/tokenizer.json
407
+ truncate_direction: right
408
+ save_folder: checkpoints/floatlm_390M_300B/
409
+ remote_save_folder: null
410
+ canceled_check_interval: 50
411
+ save_interval: 500
412
+ save_interval_unsharded: 2000
413
+ save_interval_ephemeral: null
414
+ save_num_checkpoints_to_keep: 1
415
+ save_num_unsharded_checkpoints_to_keep: -1
416
+ save_overwrite: true
417
+ force_save_unsharded: false
418
+ no_pre_train_checkpoint: false
419
+ load_path: checkpoints/floatlm_390M_300B//latest
420
+ load_path_sharded_checkpointer: null
421
+ try_load_latest_save: false
422
+ reset_optimizer_state: false
423
+ reset_trainer_state: false
424
+ sharded_checkpointer: torch_legacy
425
+ new_style_checkpoints: null
426
+ max_duration: 150000
427
+ global_train_batch_size: 1024
428
+ device_train_batch_size: 4
429
+ device_train_microbatch_size: 4
430
+ device_eval_batch_size: 4
431
+ eval_subset_num_batches: -1
432
+ eval_on_load: false
433
+ device_train_grad_accum: 1
434
+ max_grad_norm: 1.0
435
+ max_grad_norm_ratio: null
436
+ precision: amp_bf16
437
+ wandb: null
438
+ speed_monitor:
439
+ window_size: 20
440
+ gpu_flops_available: null
441
+ console_log_interval: 1
442
+ gen1_gc_interval: 1
443
+ compile: null
444
+ distributed_strategy: fsdp
445
+ fsdp:
446
+ use_orig_params: true
447
+ sharding_strategy: _HYBRID_SHARD_ZERO2
448
+ wrapping_strategy: null
449
+ precision: pure
450
+ hybrid_sharding_num_model_replicas: null
451
+ ddp: null
452
+ softmax_auxiliary_loss: false
453
+ auxiliary_loss_multiplier: 0.0001
454
+ time_limit: null
455
+ extra_steps_after_cancel: 10
456
+ early_stopping_factor: null
457
+ save_data_indices: true
458
+ python_profiling: false
459
+ torch_profiling: false
460
+ stop_at: 150010
461
+ stop_after: null
462
+ activation_checkpointing: null
463
+ fused_loss: null
464
+ hf_datasets_cache_dir: null
465
+ module_outputs_save_steps: null
step144000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6708346ee4ab49c24da51f8561083c015212ca047e944f04a5d08b9551f3000
3
+ size 16716
step146000-unsharded/config.yaml ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: floatlm_390M_300B
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 1024
7
+ n_heads: 16
8
+ n_kv_heads: null
9
+ clip_qkv: null
10
+ n_layers: 24
11
+ mlp_ratio: 4
12
+ mlp_hidden_size: 5120
13
+ activation_type: swiglu
14
+ block_type: sequential
15
+ block_group_size: 1
16
+ alibi: false
17
+ alibi_bias_max: 8.0
18
+ rope: true
19
+ rope_full_precision: true
20
+ rope_theta: 10000
21
+ flash_attention: false
22
+ attention_dropout: 0.0
23
+ multi_query_attention: false
24
+ attention_layer_norm: false
25
+ residual_dropout: 0.0
26
+ embedding_dropout: 0.0
27
+ embedding_layer_norm: false
28
+ layer_norm_type: rms
29
+ layer_norm_with_affine: true
30
+ layer_norm_eps: 1.0e-05
31
+ attention_layer_norm_with_affine: false
32
+ max_sequence_length: 2048
33
+ include_bias: false
34
+ bias_for_layer_norm: false
35
+ scale_logits: false
36
+ vocab_size: 50277
37
+ embedding_size: 50304
38
+ weight_tying: false
39
+ eos_token_id: 50277
40
+ pad_token_id: 1
41
+ init_device: meta
42
+ init_fn: mitchell
43
+ init_std: 0.02
44
+ init_cutoff_factor: null
45
+ precision: amp_bf16
46
+ scale_emb_init: false
47
+ emb_init_std: null
48
+ norm_after: false
49
+ linear_type: floatlm
50
+ num_trilm_matrix_scales: 0
51
+ optimizer:
52
+ name: adamw
53
+ learning_rate: 0.0003
54
+ weight_decay: 0.1
55
+ betas:
56
+ - 0.9
57
+ - 0.95
58
+ eps: 1.0e-05
59
+ no_decay_norm_and_bias: null
60
+ selective_updates: false
61
+ decay_norm_and_bias: false
62
+ decay_embeddings: false
63
+ metrics_log_interval: 10
64
+ record_update_metrics: false
65
+ scheduler:
66
+ name: cosine_with_warmup
67
+ units: steps
68
+ t_warmup: 375
69
+ t_max: null
70
+ alpha_f: 0.1
71
+ grad_clip_warmup_steps: null
72
+ grad_clip_warmup_factor: null
73
+ warmup_min_lr: null
74
+ remove_weight_decay_in_second_half: false
75
+ data:
76
+ paths:
77
+ - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
78
+ - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
79
+ - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
80
+ - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
81
+ - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
82
+ - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
83
+ - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
84
+ - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
85
+ - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
86
+ - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
87
+ - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
88
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
89
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
90
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
91
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
92
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
93
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
94
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
95
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
96
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
97
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
98
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
99
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
100
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
101
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
102
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
103
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
104
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
105
+ - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
106
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
107
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
108
+ - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
109
+ - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
110
+ - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
111
+ - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
112
+ - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
113
+ - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
114
+ - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
115
+ - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
116
+ - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
117
+ - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
118
+ - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
119
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
120
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
121
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
122
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
123
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
124
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
125
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
126
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
127
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
128
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
129
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
130
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
131
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
132
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
133
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
134
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
135
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
136
+ - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
137
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
138
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
139
+ - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
140
+ - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
141
+ - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
142
+ - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
143
+ - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
144
+ - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
145
+ - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
146
+ - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
147
+ - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
148
+ - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
149
+ - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
150
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
151
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
152
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
153
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
154
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
155
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
156
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
157
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
158
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
159
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
160
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
161
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
162
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
163
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
164
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
165
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
166
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
167
+ - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
168
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
169
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
170
+ - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
171
+ - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
172
+ - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
173
+ - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
174
+ - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
175
+ - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
176
+ - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
177
+ - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
178
+ - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
179
+ - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
180
+ - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
181
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
182
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
183
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
184
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
185
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
186
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
187
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
188
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
189
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
190
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
191
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
192
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
193
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
194
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
195
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
196
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
197
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
198
+ - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
199
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
200
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
201
+ - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
202
+ - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
203
+ - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
204
+ - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
205
+ - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
206
+ - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
207
+ - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
208
+ - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
209
+ - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
210
+ - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
211
+ - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
212
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
213
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
214
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
215
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
216
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
217
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
218
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
219
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
220
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
221
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
222
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
223
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
224
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
225
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
226
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
227
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
228
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
229
+ - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
230
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
231
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
232
+ - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
233
+ - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
234
+ - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
235
+ - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
236
+ - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
237
+ - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
238
+ - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
239
+ - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
240
+ - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
241
+ - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
242
+ - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
243
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
244
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
245
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
246
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
247
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
248
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
249
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
250
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
251
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
252
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
253
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
254
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
255
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
256
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
257
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
258
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
259
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
260
+ - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
261
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
262
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
263
+ - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
264
+ - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
265
+ - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
266
+ - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
267
+ - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
268
+ - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
269
+ - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
270
+ - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
271
+ - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
272
+ - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
273
+ - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
274
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
275
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
276
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
277
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
278
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
279
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
280
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
281
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
282
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
283
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
284
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
285
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
286
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
287
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
288
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
289
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
290
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
291
+ - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
292
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
293
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
294
+ - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
295
+ - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
296
+ - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
297
+ - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
298
+ - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
299
+ - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
300
+ - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
301
+ - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
302
+ - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
303
+ - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
304
+ - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
305
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
306
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
307
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
308
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
309
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
310
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
311
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
312
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
313
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
314
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
315
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
316
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
317
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
318
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
319
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
320
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
321
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
322
+ - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
323
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
324
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
325
+ - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
326
+ - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
327
+ - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
328
+ - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
329
+ - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
330
+ - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
331
+ - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
332
+ - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
333
+ - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
334
+ - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
335
+ - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
336
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
337
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
338
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
339
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
340
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
341
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
342
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
343
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
344
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
345
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
346
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
347
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
348
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
349
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
350
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
351
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
352
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
353
+ - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
354
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
355
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
356
+ - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
357
+ - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
358
+ - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
359
+ - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
360
+ - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
361
+ - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
362
+ - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
363
+ - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
364
+ - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
365
+ - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
366
+ - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
367
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
368
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
369
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
370
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
371
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
372
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
373
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
374
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
375
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
376
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
377
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
378
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
379
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
380
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
381
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
382
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
383
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
384
+ - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
385
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
386
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
387
+ memmap_dtype: uint16
388
+ datasets: null
389
+ label_mask_paths: null
390
+ pad_direction: right
391
+ generate_attention_mask: false
392
+ generate_doc_lengths: false
393
+ num_workers: 0
394
+ drop_last: true
395
+ pin_memory: true
396
+ prefetch_factor: 16
397
+ persistent_workers: true
398
+ timeout: 0
399
+ seed: null
400
+ instance_filter: null
401
+ restore_dataloader: true
402
+ fast_forward_batches: null
403
+ evaluators: []
404
+ eval_interval: 500
405
+ tokenizer:
406
+ identifier: ../spectra_tokenizer/tokenizer.json
407
+ truncate_direction: right
408
+ save_folder: checkpoints/floatlm_390M_300B/
409
+ remote_save_folder: null
410
+ canceled_check_interval: 50
411
+ save_interval: 500
412
+ save_interval_unsharded: 2000
413
+ save_interval_ephemeral: null
414
+ save_num_checkpoints_to_keep: 1
415
+ save_num_unsharded_checkpoints_to_keep: -1
416
+ save_overwrite: true
417
+ force_save_unsharded: false
418
+ no_pre_train_checkpoint: false
419
+ load_path: checkpoints/floatlm_390M_300B//latest
420
+ load_path_sharded_checkpointer: null
421
+ try_load_latest_save: false
422
+ reset_optimizer_state: false
423
+ reset_trainer_state: false
424
+ sharded_checkpointer: torch_legacy
425
+ new_style_checkpoints: null
426
+ max_duration: 150000
427
+ global_train_batch_size: 1024
428
+ device_train_batch_size: 4
429
+ device_train_microbatch_size: 4
430
+ device_eval_batch_size: 4
431
+ eval_subset_num_batches: -1
432
+ eval_on_load: false
433
+ device_train_grad_accum: 1
434
+ max_grad_norm: 1.0
435
+ max_grad_norm_ratio: null
436
+ precision: amp_bf16
437
+ wandb: null
438
+ speed_monitor:
439
+ window_size: 20
440
+ gpu_flops_available: null
441
+ console_log_interval: 1
442
+ gen1_gc_interval: 1
443
+ compile: null
444
+ distributed_strategy: fsdp
445
+ fsdp:
446
+ use_orig_params: true
447
+ sharding_strategy: _HYBRID_SHARD_ZERO2
448
+ wrapping_strategy: null
449
+ precision: pure
450
+ hybrid_sharding_num_model_replicas: null
451
+ ddp: null
452
+ softmax_auxiliary_loss: false
453
+ auxiliary_loss_multiplier: 0.0001
454
+ time_limit: null
455
+ extra_steps_after_cancel: 10
456
+ early_stopping_factor: null
457
+ save_data_indices: true
458
+ python_profiling: false
459
+ torch_profiling: false
460
+ stop_at: 150010
461
+ stop_after: null
462
+ activation_checkpointing: null
463
+ fused_loss: null
464
+ hf_datasets_cache_dir: null
465
+ module_outputs_save_steps: null
step150000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edcc6254951da8a1e9e181b7b5ca8af1b4c47a8855dcf9b6abafbdc9e80434e1
3
+ size 16844
step150000/config.yaml ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: floatlm_390M_300B
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 1024
7
+ n_heads: 16
8
+ n_kv_heads: null
9
+ clip_qkv: null
10
+ n_layers: 24
11
+ mlp_ratio: 4
12
+ mlp_hidden_size: 5120
13
+ activation_type: swiglu
14
+ block_type: sequential
15
+ block_group_size: 1
16
+ alibi: false
17
+ alibi_bias_max: 8.0
18
+ rope: true
19
+ rope_full_precision: true
20
+ rope_theta: 10000
21
+ flash_attention: false
22
+ attention_dropout: 0.0
23
+ multi_query_attention: false
24
+ attention_layer_norm: false
25
+ residual_dropout: 0.0
26
+ embedding_dropout: 0.0
27
+ embedding_layer_norm: false
28
+ layer_norm_type: rms
29
+ layer_norm_with_affine: true
30
+ layer_norm_eps: 1.0e-05
31
+ attention_layer_norm_with_affine: false
32
+ max_sequence_length: 2048
33
+ include_bias: false
34
+ bias_for_layer_norm: false
35
+ scale_logits: false
36
+ vocab_size: 50277
37
+ embedding_size: 50304
38
+ weight_tying: false
39
+ eos_token_id: 50277
40
+ pad_token_id: 1
41
+ init_device: meta
42
+ init_fn: mitchell
43
+ init_std: 0.02
44
+ init_cutoff_factor: null
45
+ precision: amp_bf16
46
+ scale_emb_init: false
47
+ emb_init_std: null
48
+ norm_after: false
49
+ linear_type: floatlm
50
+ num_trilm_matrix_scales: 0
51
+ optimizer:
52
+ name: adamw
53
+ learning_rate: 0.0003
54
+ weight_decay: 0.1
55
+ betas:
56
+ - 0.9
57
+ - 0.95
58
+ eps: 1.0e-05
59
+ no_decay_norm_and_bias: null
60
+ selective_updates: false
61
+ decay_norm_and_bias: false
62
+ decay_embeddings: false
63
+ metrics_log_interval: 10
64
+ record_update_metrics: false
65
+ scheduler:
66
+ name: cosine_with_warmup
67
+ units: steps
68
+ t_warmup: 375
69
+ t_max: null
70
+ alpha_f: 0.1
71
+ grad_clip_warmup_steps: null
72
+ grad_clip_warmup_factor: null
73
+ warmup_min_lr: null
74
+ remove_weight_decay_in_second_half: false
75
+ data:
76
+ paths:
77
+ - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
78
+ - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
79
+ - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
80
+ - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
81
+ - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
82
+ - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
83
+ - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
84
+ - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
85
+ - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
86
+ - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
87
+ - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
88
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
89
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
90
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
91
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
92
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
93
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
94
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
95
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
96
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
97
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
98
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
99
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
100
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
101
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
102
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
103
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
104
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
105
+ - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
106
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
107
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
108
+ - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
109
+ - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
110
+ - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
111
+ - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
112
+ - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
113
+ - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
114
+ - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
115
+ - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
116
+ - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
117
+ - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
118
+ - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
119
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
120
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
121
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
122
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
123
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
124
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
125
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
126
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
127
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
128
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
129
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
130
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
131
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
132
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
133
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
134
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
135
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
136
+ - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
137
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
138
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
139
+ - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
140
+ - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
141
+ - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
142
+ - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
143
+ - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
144
+ - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
145
+ - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
146
+ - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
147
+ - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
148
+ - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
149
+ - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
150
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
151
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
152
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
153
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
154
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
155
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
156
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
157
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
158
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
159
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
160
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
161
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
162
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
163
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
164
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
165
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
166
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
167
+ - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
168
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
169
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
170
+ - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
171
+ - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
172
+ - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
173
+ - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
174
+ - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
175
+ - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
176
+ - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
177
+ - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
178
+ - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
179
+ - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
180
+ - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
181
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
182
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
183
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
184
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
185
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
186
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
187
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
188
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
189
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
190
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
191
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
192
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
193
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
194
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
195
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
196
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
197
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
198
+ - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
199
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
200
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
201
+ - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
202
+ - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
203
+ - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
204
+ - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
205
+ - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
206
+ - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
207
+ - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
208
+ - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
209
+ - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
210
+ - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
211
+ - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
212
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
213
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
214
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
215
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
216
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
217
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
218
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
219
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
220
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
221
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
222
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
223
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
224
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
225
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
226
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
227
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
228
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
229
+ - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
230
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
231
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
232
+ - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
233
+ - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
234
+ - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
235
+ - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
236
+ - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
237
+ - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
238
+ - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
239
+ - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
240
+ - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
241
+ - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
242
+ - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
243
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
244
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
245
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
246
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
247
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
248
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
249
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
250
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
251
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
252
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
253
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
254
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
255
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
256
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
257
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
258
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
259
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
260
+ - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
261
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
262
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
263
+ - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
264
+ - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
265
+ - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
266
+ - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
267
+ - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
268
+ - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
269
+ - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
270
+ - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
271
+ - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
272
+ - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
273
+ - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
274
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
275
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
276
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
277
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
278
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
279
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
280
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
281
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
282
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
283
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
284
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
285
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
286
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
287
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
288
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
289
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
290
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
291
+ - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
292
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
293
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
294
+ - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
295
+ - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
296
+ - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
297
+ - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
298
+ - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
299
+ - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
300
+ - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
301
+ - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
302
+ - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
303
+ - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
304
+ - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
305
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
306
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
307
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
308
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
309
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
310
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
311
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
312
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
313
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
314
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
315
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
316
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
317
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
318
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
319
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
320
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
321
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
322
+ - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
323
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
324
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
325
+ - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
326
+ - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
327
+ - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
328
+ - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
329
+ - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
330
+ - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
331
+ - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
332
+ - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
333
+ - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
334
+ - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
335
+ - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
336
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
337
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
338
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
339
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
340
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
341
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
342
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
343
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
344
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
345
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
346
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
347
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
348
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
349
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
350
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
351
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
352
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
353
+ - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
354
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
355
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
356
+ - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
357
+ - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
358
+ - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
359
+ - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
360
+ - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
361
+ - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
362
+ - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
363
+ - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
364
+ - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
365
+ - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
366
+ - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
367
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
368
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
369
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
370
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
371
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
372
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
373
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
374
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
375
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
376
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
377
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
378
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
379
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
380
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
381
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
382
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
383
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
384
+ - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
385
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
386
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
387
+ memmap_dtype: uint16
388
+ datasets: null
389
+ label_mask_paths: null
390
+ pad_direction: right
391
+ generate_attention_mask: false
392
+ generate_doc_lengths: false
393
+ num_workers: 0
394
+ drop_last: true
395
+ pin_memory: true
396
+ prefetch_factor: 16
397
+ persistent_workers: true
398
+ timeout: 0
399
+ seed: null
400
+ instance_filter: null
401
+ restore_dataloader: true
402
+ fast_forward_batches: null
403
+ evaluators: []
404
+ eval_interval: 500
405
+ tokenizer:
406
+ identifier: ../spectra_tokenizer/tokenizer.json
407
+ truncate_direction: right
408
+ save_folder: checkpoints/floatlm_390M_300B/
409
+ remote_save_folder: null
410
+ canceled_check_interval: 50
411
+ save_interval: 500
412
+ save_interval_unsharded: 2000
413
+ save_interval_ephemeral: null
414
+ save_num_checkpoints_to_keep: 1
415
+ save_num_unsharded_checkpoints_to_keep: -1
416
+ save_overwrite: true
417
+ force_save_unsharded: false
418
+ no_pre_train_checkpoint: false
419
+ load_path: checkpoints/floatlm_390M_300B//latest
420
+ load_path_sharded_checkpointer: null
421
+ try_load_latest_save: false
422
+ reset_optimizer_state: false
423
+ reset_trainer_state: false
424
+ sharded_checkpointer: torch_legacy
425
+ new_style_checkpoints: null
426
+ max_duration: 150000
427
+ global_train_batch_size: 1024
428
+ device_train_batch_size: 4
429
+ device_train_microbatch_size: 4
430
+ device_eval_batch_size: 4
431
+ eval_subset_num_batches: -1
432
+ eval_on_load: false
433
+ device_train_grad_accum: 1
434
+ max_grad_norm: 1.0
435
+ max_grad_norm_ratio: null
436
+ precision: amp_bf16
437
+ wandb: null
438
+ speed_monitor:
439
+ window_size: 20
440
+ gpu_flops_available: null
441
+ console_log_interval: 1
442
+ gen1_gc_interval: 1
443
+ compile: null
444
+ distributed_strategy: fsdp
445
+ fsdp:
446
+ use_orig_params: true
447
+ sharding_strategy: _HYBRID_SHARD_ZERO2
448
+ wrapping_strategy: null
449
+ precision: pure
450
+ hybrid_sharding_num_model_replicas: null
451
+ ddp: null
452
+ softmax_auxiliary_loss: false
453
+ auxiliary_loss_multiplier: 0.0001
454
+ time_limit: null
455
+ extra_steps_after_cancel: 10
456
+ early_stopping_factor: null
457
+ save_data_indices: true
458
+ python_profiling: false
459
+ torch_profiling: false
460
+ stop_at: 150010
461
+ stop_after: null
462
+ activation_checkpointing: null
463
+ fused_loss: null
464
+ hf_datasets_cache_dir: null
465
+ module_outputs_save_steps: null
step150000/rank4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c766855c714dce1528255d7eb8d707697ed119981dfd8bb1c278971801e619fb
3
+ size 588996398
step150010-unsharded/config.yaml ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: floatlm_390M_300B
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 1024
7
+ n_heads: 16
8
+ n_kv_heads: null
9
+ clip_qkv: null
10
+ n_layers: 24
11
+ mlp_ratio: 4
12
+ mlp_hidden_size: 5120
13
+ activation_type: swiglu
14
+ block_type: sequential
15
+ block_group_size: 1
16
+ alibi: false
17
+ alibi_bias_max: 8.0
18
+ rope: true
19
+ rope_full_precision: true
20
+ rope_theta: 10000
21
+ flash_attention: false
22
+ attention_dropout: 0.0
23
+ multi_query_attention: false
24
+ attention_layer_norm: false
25
+ residual_dropout: 0.0
26
+ embedding_dropout: 0.0
27
+ embedding_layer_norm: false
28
+ layer_norm_type: rms
29
+ layer_norm_with_affine: true
30
+ layer_norm_eps: 1.0e-05
31
+ attention_layer_norm_with_affine: false
32
+ max_sequence_length: 2048
33
+ include_bias: false
34
+ bias_for_layer_norm: false
35
+ scale_logits: false
36
+ vocab_size: 50277
37
+ embedding_size: 50304
38
+ weight_tying: false
39
+ eos_token_id: 50277
40
+ pad_token_id: 1
41
+ init_device: meta
42
+ init_fn: mitchell
43
+ init_std: 0.02
44
+ init_cutoff_factor: null
45
+ precision: amp_bf16
46
+ scale_emb_init: false
47
+ emb_init_std: null
48
+ norm_after: false
49
+ linear_type: floatlm
50
+ num_trilm_matrix_scales: 0
51
+ optimizer:
52
+ name: adamw
53
+ learning_rate: 0.0003
54
+ weight_decay: 0.1
55
+ betas:
56
+ - 0.9
57
+ - 0.95
58
+ eps: 1.0e-05
59
+ no_decay_norm_and_bias: null
60
+ selective_updates: false
61
+ decay_norm_and_bias: false
62
+ decay_embeddings: false
63
+ metrics_log_interval: 10
64
+ record_update_metrics: false
65
+ scheduler:
66
+ name: cosine_with_warmup
67
+ units: steps
68
+ t_warmup: 375
69
+ t_max: null
70
+ alpha_f: 0.1
71
+ grad_clip_warmup_steps: null
72
+ grad_clip_warmup_factor: null
73
+ warmup_min_lr: null
74
+ remove_weight_decay_in_second_half: false
75
+ data:
76
+ paths:
77
+ - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
78
+ - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
79
+ - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
80
+ - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
81
+ - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
82
+ - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
83
+ - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
84
+ - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
85
+ - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
86
+ - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
87
+ - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
88
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
89
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
90
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
91
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
92
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
93
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
94
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
95
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
96
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
97
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
98
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
99
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
100
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
101
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
102
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
103
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
104
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
105
+ - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
106
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
107
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
108
+ - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
109
+ - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
110
+ - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
111
+ - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
112
+ - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
113
+ - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
114
+ - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
115
+ - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
116
+ - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
117
+ - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
118
+ - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
119
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
120
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
121
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
122
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
123
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
124
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
125
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
126
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
127
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
128
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
129
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
130
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
131
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
132
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
133
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
134
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
135
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
136
+ - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
137
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
138
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
139
+ - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
140
+ - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
141
+ - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
142
+ - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
143
+ - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
144
+ - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
145
+ - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
146
+ - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
147
+ - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
148
+ - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
149
+ - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
150
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
151
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
152
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
153
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
154
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
155
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
156
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
157
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
158
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
159
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
160
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
161
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
162
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
163
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
164
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
165
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
166
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
167
+ - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
168
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
169
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
170
+ - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
171
+ - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
172
+ - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
173
+ - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
174
+ - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
175
+ - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
176
+ - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
177
+ - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
178
+ - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
179
+ - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
180
+ - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
181
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
182
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
183
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
184
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
185
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
186
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
187
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
188
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
189
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
190
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
191
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
192
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
193
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
194
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
195
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
196
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
197
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
198
+ - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
199
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
200
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
201
+ - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
202
+ - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
203
+ - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
204
+ - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
205
+ - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
206
+ - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
207
+ - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
208
+ - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
209
+ - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
210
+ - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
211
+ - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
212
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
213
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
214
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
215
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
216
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
217
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
218
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
219
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
220
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
221
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
222
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
223
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
224
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
225
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
226
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
227
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
228
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
229
+ - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
230
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
231
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
232
+ - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
233
+ - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
234
+ - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
235
+ - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
236
+ - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
237
+ - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
238
+ - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
239
+ - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
240
+ - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
241
+ - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
242
+ - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
243
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
244
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
245
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
246
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
247
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
248
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
249
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
250
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
251
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
252
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
253
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
254
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
255
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
256
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
257
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
258
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
259
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
260
+ - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
261
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
262
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
263
+ - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
264
+ - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
265
+ - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
266
+ - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
267
+ - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
268
+ - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
269
+ - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
270
+ - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
271
+ - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
272
+ - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
273
+ - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
274
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
275
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
276
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
277
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
278
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
279
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
280
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
281
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
282
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
283
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
284
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
285
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
286
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
287
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
288
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
289
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
290
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
291
+ - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
292
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
293
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
294
+ - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
295
+ - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
296
+ - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
297
+ - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
298
+ - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
299
+ - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
300
+ - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
301
+ - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
302
+ - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
303
+ - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
304
+ - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
305
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
306
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
307
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
308
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
309
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
310
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
311
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
312
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
313
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
314
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
315
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
316
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
317
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
318
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
319
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
320
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
321
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
322
+ - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
323
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
324
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
325
+ - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
326
+ - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
327
+ - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
328
+ - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
329
+ - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
330
+ - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
331
+ - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
332
+ - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
333
+ - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
334
+ - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
335
+ - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
336
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
337
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
338
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
339
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
340
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
341
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
342
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
343
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
344
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
345
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
346
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
347
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
348
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
349
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
350
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
351
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
352
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
353
+ - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
354
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
355
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
356
+ - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
357
+ - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
358
+ - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
359
+ - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
360
+ - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
361
+ - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
362
+ - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
363
+ - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
364
+ - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
365
+ - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
366
+ - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
367
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
368
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
369
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
370
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
371
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
372
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
373
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
374
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
375
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
376
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
377
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
378
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
379
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
380
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
381
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
382
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
383
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
384
+ - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
385
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
386
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
387
+ memmap_dtype: uint16
388
+ datasets: null
389
+ label_mask_paths: null
390
+ pad_direction: right
391
+ generate_attention_mask: false
392
+ generate_doc_lengths: false
393
+ num_workers: 0
394
+ drop_last: true
395
+ pin_memory: true
396
+ prefetch_factor: 16
397
+ persistent_workers: true
398
+ timeout: 0
399
+ seed: null
400
+ instance_filter: null
401
+ restore_dataloader: true
402
+ fast_forward_batches: null
403
+ evaluators: []
404
+ eval_interval: 500
405
+ tokenizer:
406
+ identifier: ../spectra_tokenizer/tokenizer.json
407
+ truncate_direction: right
408
+ save_folder: checkpoints/floatlm_390M_300B/
409
+ remote_save_folder: null
410
+ canceled_check_interval: 50
411
+ save_interval: 500
412
+ save_interval_unsharded: 2000
413
+ save_interval_ephemeral: null
414
+ save_num_checkpoints_to_keep: 1
415
+ save_num_unsharded_checkpoints_to_keep: -1
416
+ save_overwrite: true
417
+ force_save_unsharded: false
418
+ no_pre_train_checkpoint: false
419
+ load_path: checkpoints/floatlm_390M_300B//latest
420
+ load_path_sharded_checkpointer: null
421
+ try_load_latest_save: false
422
+ reset_optimizer_state: false
423
+ reset_trainer_state: false
424
+ sharded_checkpointer: torch_legacy
425
+ new_style_checkpoints: null
426
+ max_duration: 150000
427
+ global_train_batch_size: 1024
428
+ device_train_batch_size: 4
429
+ device_train_microbatch_size: 4
430
+ device_eval_batch_size: 4
431
+ eval_subset_num_batches: -1
432
+ eval_on_load: false
433
+ device_train_grad_accum: 1
434
+ max_grad_norm: 1.0
435
+ max_grad_norm_ratio: null
436
+ precision: amp_bf16
437
+ wandb: null
438
+ speed_monitor:
439
+ window_size: 20
440
+ gpu_flops_available: null
441
+ console_log_interval: 1
442
+ gen1_gc_interval: 1
443
+ compile: null
444
+ distributed_strategy: fsdp
445
+ fsdp:
446
+ use_orig_params: true
447
+ sharding_strategy: _HYBRID_SHARD_ZERO2
448
+ wrapping_strategy: null
449
+ precision: pure
450
+ hybrid_sharding_num_model_replicas: null
451
+ ddp: null
452
+ softmax_auxiliary_loss: false
453
+ auxiliary_loss_multiplier: 0.0001
454
+ time_limit: null
455
+ extra_steps_after_cancel: 10
456
+ early_stopping_factor: null
457
+ save_data_indices: true
458
+ python_profiling: false
459
+ torch_profiling: false
460
+ stop_at: 150010
461
+ stop_after: null
462
+ activation_checkpointing: null
463
+ fused_loss: null
464
+ hf_datasets_cache_dir: null
465
+ module_outputs_save_steps: null
step16000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a65b1d1df8d32f80182c12dba08a3201f2b660cc87aee47ac3724c50d8059bd8
3
+ size 14732
step2000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9867809cf082cbbe7021073723dfe113cbf8e1dbbb33dc10cbb665460109848
3
+ size 14540
step28000-unsharded/config.yaml ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: floatlm_390M_300B
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 1024
7
+ n_heads: 16
8
+ n_kv_heads: null
9
+ clip_qkv: null
10
+ n_layers: 24
11
+ mlp_ratio: 4
12
+ mlp_hidden_size: 5120
13
+ activation_type: swiglu
14
+ block_type: sequential
15
+ block_group_size: 1
16
+ alibi: false
17
+ alibi_bias_max: 8.0
18
+ rope: true
19
+ rope_full_precision: true
20
+ rope_theta: 10000
21
+ flash_attention: false
22
+ attention_dropout: 0.0
23
+ multi_query_attention: false
24
+ attention_layer_norm: false
25
+ residual_dropout: 0.0
26
+ embedding_dropout: 0.0
27
+ embedding_layer_norm: false
28
+ layer_norm_type: rms
29
+ layer_norm_with_affine: true
30
+ layer_norm_eps: 1.0e-05
31
+ attention_layer_norm_with_affine: false
32
+ max_sequence_length: 2048
33
+ include_bias: false
34
+ bias_for_layer_norm: false
35
+ scale_logits: false
36
+ vocab_size: 50277
37
+ embedding_size: 50304
38
+ weight_tying: false
39
+ eos_token_id: 50277
40
+ pad_token_id: 1
41
+ init_device: meta
42
+ init_fn: mitchell
43
+ init_std: 0.02
44
+ init_cutoff_factor: null
45
+ precision: amp_bf16
46
+ scale_emb_init: false
47
+ emb_init_std: null
48
+ norm_after: false
49
+ linear_type: floatlm
50
+ num_trilm_matrix_scales: 0
51
+ optimizer:
52
+ name: adamw
53
+ learning_rate: 0.0003
54
+ weight_decay: 0.1
55
+ betas:
56
+ - 0.9
57
+ - 0.95
58
+ eps: 1.0e-05
59
+ no_decay_norm_and_bias: null
60
+ selective_updates: false
61
+ decay_norm_and_bias: false
62
+ decay_embeddings: false
63
+ metrics_log_interval: 10
64
+ record_update_metrics: false
65
+ scheduler:
66
+ name: cosine_with_warmup
67
+ units: steps
68
+ t_warmup: 375
69
+ t_max: null
70
+ alpha_f: 0.1
71
+ grad_clip_warmup_steps: null
72
+ grad_clip_warmup_factor: null
73
+ warmup_min_lr: null
74
+ remove_weight_decay_in_second_half: false
75
+ data:
76
+ paths:
77
+ - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
78
+ - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
79
+ - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
80
+ - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
81
+ - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
82
+ - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
83
+ - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
84
+ - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
85
+ - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
86
+ - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
87
+ - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
88
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
89
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
90
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
91
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
92
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
93
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
94
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
95
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
96
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
97
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
98
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
99
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
100
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
101
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
102
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
103
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
104
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
105
+ - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
106
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
107
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
108
+ - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
109
+ - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
110
+ - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
111
+ - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
112
+ - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
113
+ - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
114
+ - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
115
+ - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
116
+ - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
117
+ - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
118
+ - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
119
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
120
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
121
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
122
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
123
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
124
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
125
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
126
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
127
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
128
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
129
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
130
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
131
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
132
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
133
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
134
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
135
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
136
+ - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
137
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
138
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
139
+ - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
140
+ - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
141
+ - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
142
+ - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
143
+ - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
144
+ - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
145
+ - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
146
+ - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
147
+ - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
148
+ - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
149
+ - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
150
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
151
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
152
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
153
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
154
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
155
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
156
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
157
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
158
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
159
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
160
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
161
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
162
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
163
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
164
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
165
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
166
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
167
+ - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
168
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
169
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
170
+ - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
171
+ - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
172
+ - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
173
+ - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
174
+ - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
175
+ - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
176
+ - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
177
+ - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
178
+ - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
179
+ - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
180
+ - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
181
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
182
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
183
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
184
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
185
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
186
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
187
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
188
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
189
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
190
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
191
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
192
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
193
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
194
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
195
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
196
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
197
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
198
+ - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
199
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
200
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
201
+ - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
202
+ - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
203
+ - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
204
+ - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
205
+ - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
206
+ - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
207
+ - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
208
+ - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
209
+ - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
210
+ - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
211
+ - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
212
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
213
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
214
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
215
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
216
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
217
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
218
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
219
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
220
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
221
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
222
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
223
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
224
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
225
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
226
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
227
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
228
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
229
+ - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
230
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
231
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
232
+ - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
233
+ - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
234
+ - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
235
+ - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
236
+ - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
237
+ - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
238
+ - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
239
+ - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
240
+ - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
241
+ - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
242
+ - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
243
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
244
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
245
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
246
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
247
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
248
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
249
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
250
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
251
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
252
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
253
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
254
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
255
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
256
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
257
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
258
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
259
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
260
+ - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
261
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
262
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
263
+ - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
264
+ - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
265
+ - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
266
+ - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
267
+ - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
268
+ - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
269
+ - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
270
+ - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
271
+ - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
272
+ - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
273
+ - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
274
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
275
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
276
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
277
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
278
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
279
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
280
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
281
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
282
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
283
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
284
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
285
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
286
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
287
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
288
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
289
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
290
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
291
+ - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
292
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
293
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
294
+ - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
295
+ - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
296
+ - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
297
+ - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
298
+ - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
299
+ - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
300
+ - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
301
+ - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
302
+ - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
303
+ - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
304
+ - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
305
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
306
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
307
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
308
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
309
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
310
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
311
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
312
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
313
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
314
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
315
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
316
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
317
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
318
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
319
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
320
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
321
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
322
+ - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
323
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
324
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
325
+ - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
326
+ - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
327
+ - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
328
+ - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
329
+ - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
330
+ - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
331
+ - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
332
+ - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
333
+ - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
334
+ - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
335
+ - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
336
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
337
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
338
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
339
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
340
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
341
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
342
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
343
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
344
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
345
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
346
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
347
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
348
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
349
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
350
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
351
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
352
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
353
+ - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
354
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
355
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
356
+ - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
357
+ - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
358
+ - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
359
+ - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
360
+ - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
361
+ - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
362
+ - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
363
+ - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
364
+ - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
365
+ - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
366
+ - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
367
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
368
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
369
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
370
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
371
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
372
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
373
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
374
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
375
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
376
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
377
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
378
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
379
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
380
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
381
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
382
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
383
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
384
+ - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
385
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
386
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
387
+ memmap_dtype: uint16
388
+ datasets: null
389
+ label_mask_paths: null
390
+ pad_direction: right
391
+ generate_attention_mask: false
392
+ generate_doc_lengths: false
393
+ num_workers: 0
394
+ drop_last: true
395
+ pin_memory: true
396
+ prefetch_factor: 16
397
+ persistent_workers: true
398
+ timeout: 0
399
+ seed: null
400
+ instance_filter: null
401
+ restore_dataloader: true
402
+ fast_forward_batches: null
403
+ evaluators: []
404
+ eval_interval: 500
405
+ tokenizer:
406
+ identifier: ../spectra_tokenizer/tokenizer.json
407
+ truncate_direction: right
408
+ save_folder: checkpoints/floatlm_390M_300B/
409
+ remote_save_folder: null
410
+ canceled_check_interval: 50
411
+ save_interval: 500
412
+ save_interval_unsharded: 2000
413
+ save_interval_ephemeral: null
414
+ save_num_checkpoints_to_keep: 1
415
+ save_num_unsharded_checkpoints_to_keep: -1
416
+ save_overwrite: true
417
+ force_save_unsharded: false
418
+ no_pre_train_checkpoint: false
419
+ load_path: checkpoints/floatlm_390M_300B//latest
420
+ load_path_sharded_checkpointer: null
421
+ try_load_latest_save: false
422
+ reset_optimizer_state: false
423
+ reset_trainer_state: false
424
+ sharded_checkpointer: torch_legacy
425
+ new_style_checkpoints: null
426
+ max_duration: 150000
427
+ global_train_batch_size: 1024
428
+ device_train_batch_size: 4
429
+ device_train_microbatch_size: 4
430
+ device_eval_batch_size: 4
431
+ eval_subset_num_batches: -1
432
+ eval_on_load: false
433
+ device_train_grad_accum: 1
434
+ max_grad_norm: 1.0
435
+ max_grad_norm_ratio: null
436
+ precision: amp_bf16
437
+ wandb: null
438
+ speed_monitor:
439
+ window_size: 20
440
+ gpu_flops_available: null
441
+ console_log_interval: 1
442
+ gen1_gc_interval: 1
443
+ compile: null
444
+ distributed_strategy: fsdp
445
+ fsdp:
446
+ use_orig_params: true
447
+ sharding_strategy: _HYBRID_SHARD_ZERO2
448
+ wrapping_strategy: null
449
+ precision: pure
450
+ hybrid_sharding_num_model_replicas: null
451
+ ddp: null
452
+ softmax_auxiliary_loss: false
453
+ auxiliary_loss_multiplier: 0.0001
454
+ time_limit: null
455
+ extra_steps_after_cancel: 10
456
+ early_stopping_factor: null
457
+ save_data_indices: true
458
+ python_profiling: false
459
+ torch_profiling: false
460
+ stop_at: 150010
461
+ stop_after: null
462
+ activation_checkpointing: null
463
+ fused_loss: null
464
+ hf_datasets_cache_dir: null
465
+ module_outputs_save_steps: null
step38000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c3abca5ebea043ffc84d6866e149068248d402ac65365ea299c7aa1690f07bb
3
+ size 15052
step40000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54e101b6bd2492dd3239c6e7a9db3b3510b0425bb66d91ac07be2eacb0f981a2
3
+ size 15116
step44000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b2dabe7c5f49b9845a790026f4fb38af7d72d5ca7f86a1532df678af11bbb83
3
+ size 15180
step46000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46e31b2d24ace2d634dc5d46c6baf40811ac8cec3f5d2e9fede0492d8126eda8
3
+ size 15244
step50000-unsharded/config.yaml ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: floatlm_390M_300B
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 1024
7
+ n_heads: 16
8
+ n_kv_heads: null
9
+ clip_qkv: null
10
+ n_layers: 24
11
+ mlp_ratio: 4
12
+ mlp_hidden_size: 5120
13
+ activation_type: swiglu
14
+ block_type: sequential
15
+ block_group_size: 1
16
+ alibi: false
17
+ alibi_bias_max: 8.0
18
+ rope: true
19
+ rope_full_precision: true
20
+ rope_theta: 10000
21
+ flash_attention: false
22
+ attention_dropout: 0.0
23
+ multi_query_attention: false
24
+ attention_layer_norm: false
25
+ residual_dropout: 0.0
26
+ embedding_dropout: 0.0
27
+ embedding_layer_norm: false
28
+ layer_norm_type: rms
29
+ layer_norm_with_affine: true
30
+ layer_norm_eps: 1.0e-05
31
+ attention_layer_norm_with_affine: false
32
+ max_sequence_length: 2048
33
+ include_bias: false
34
+ bias_for_layer_norm: false
35
+ scale_logits: false
36
+ vocab_size: 50277
37
+ embedding_size: 50304
38
+ weight_tying: false
39
+ eos_token_id: 50277
40
+ pad_token_id: 1
41
+ init_device: meta
42
+ init_fn: mitchell
43
+ init_std: 0.02
44
+ init_cutoff_factor: null
45
+ precision: amp_bf16
46
+ scale_emb_init: false
47
+ emb_init_std: null
48
+ norm_after: false
49
+ linear_type: floatlm
50
+ num_trilm_matrix_scales: 0
51
+ optimizer:
52
+ name: adamw
53
+ learning_rate: 0.0003
54
+ weight_decay: 0.1
55
+ betas:
56
+ - 0.9
57
+ - 0.95
58
+ eps: 1.0e-05
59
+ no_decay_norm_and_bias: null
60
+ selective_updates: false
61
+ decay_norm_and_bias: false
62
+ decay_embeddings: false
63
+ metrics_log_interval: 10
64
+ record_update_metrics: false
65
+ scheduler:
66
+ name: cosine_with_warmup
67
+ units: steps
68
+ t_warmup: 375
69
+ t_max: null
70
+ alpha_f: 0.1
71
+ grad_clip_warmup_steps: null
72
+ grad_clip_warmup_factor: null
73
+ warmup_min_lr: null
74
+ remove_weight_decay_in_second_half: false
75
+ data:
76
+ paths:
77
+ - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
78
+ - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
79
+ - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
80
+ - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
81
+ - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
82
+ - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
83
+ - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
84
+ - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
85
+ - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
86
+ - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
87
+ - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
88
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
89
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
90
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
91
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
92
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
93
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
94
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
95
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
96
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
97
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
98
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
99
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
100
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
101
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
102
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
103
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
104
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
105
+ - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
106
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
107
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
108
+ - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
109
+ - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
110
+ - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
111
+ - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
112
+ - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
113
+ - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
114
+ - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
115
+ - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
116
+ - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
117
+ - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
118
+ - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
119
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
120
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
121
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
122
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
123
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
124
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
125
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
126
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
127
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
128
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
129
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
130
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
131
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
132
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
133
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
134
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
135
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
136
+ - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
137
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
138
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
139
+ - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
140
+ - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
141
+ - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
142
+ - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
143
+ - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
144
+ - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
145
+ - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
146
+ - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
147
+ - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
148
+ - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
149
+ - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
150
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
151
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
152
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
153
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
154
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
155
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
156
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
157
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
158
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
159
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
160
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
161
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
162
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
163
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
164
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
165
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
166
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
167
+ - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
168
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
169
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
170
+ - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
171
+ - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
172
+ - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
173
+ - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
174
+ - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
175
+ - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
176
+ - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
177
+ - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
178
+ - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
179
+ - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
180
+ - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
181
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
182
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
183
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
184
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
185
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
186
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
187
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
188
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
189
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
190
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
191
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
192
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
193
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
194
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
195
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
196
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
197
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
198
+ - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
199
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
200
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
201
+ - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
202
+ - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
203
+ - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
204
+ - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
205
+ - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
206
+ - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
207
+ - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
208
+ - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
209
+ - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
210
+ - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
211
+ - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
212
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
213
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
214
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
215
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
216
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
217
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
218
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
219
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
220
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
221
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
222
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
223
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
224
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
225
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
226
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
227
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
228
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
229
+ - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
230
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
231
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
232
+ - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
233
+ - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
234
+ - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
235
+ - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
236
+ - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
237
+ - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
238
+ - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
239
+ - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
240
+ - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
241
+ - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
242
+ - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
243
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
244
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
245
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
246
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
247
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
248
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
249
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
250
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
251
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
252
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
253
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
254
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
255
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
256
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
257
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
258
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
259
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
260
+ - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
261
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
262
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
263
+ - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
264
+ - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
265
+ - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
266
+ - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
267
+ - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
268
+ - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
269
+ - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
270
+ - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
271
+ - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
272
+ - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
273
+ - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
274
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
275
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
276
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
277
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
278
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
279
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
280
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
281
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
282
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
283
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
284
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
285
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
286
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
287
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
288
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
289
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
290
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
291
+ - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
292
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
293
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
294
+ - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
295
+ - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
296
+ - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
297
+ - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
298
+ - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
299
+ - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
300
+ - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
301
+ - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
302
+ - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
303
+ - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
304
+ - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
305
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
306
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
307
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
308
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
309
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
310
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
311
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
312
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
313
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
314
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
315
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
316
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
317
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
318
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
319
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
320
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
321
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
322
+ - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
323
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
324
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
325
+ - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
326
+ - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
327
+ - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
328
+ - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
329
+ - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
330
+ - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
331
+ - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
332
+ - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
333
+ - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
334
+ - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
335
+ - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
336
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
337
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
338
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
339
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
340
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
341
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
342
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
343
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
344
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
345
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
346
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
347
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
348
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
349
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
350
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
351
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
352
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
353
+ - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
354
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
355
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
356
+ - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
357
+ - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
358
+ - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
359
+ - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
360
+ - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
361
+ - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
362
+ - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
363
+ - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
364
+ - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
365
+ - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
366
+ - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
367
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
368
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
369
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
370
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
371
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
372
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
373
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
374
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
375
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
376
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
377
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
378
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
379
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
380
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
381
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
382
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
383
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
384
+ - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
385
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
386
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
387
+ memmap_dtype: uint16
388
+ datasets: null
389
+ label_mask_paths: null
390
+ pad_direction: right
391
+ generate_attention_mask: false
392
+ generate_doc_lengths: false
393
+ num_workers: 0
394
+ drop_last: true
395
+ pin_memory: true
396
+ prefetch_factor: 16
397
+ persistent_workers: true
398
+ timeout: 0
399
+ seed: null
400
+ instance_filter: null
401
+ restore_dataloader: true
402
+ fast_forward_batches: null
403
+ evaluators: []
404
+ eval_interval: 500
405
+ tokenizer:
406
+ identifier: ../spectra_tokenizer/tokenizer.json
407
+ truncate_direction: right
408
+ save_folder: checkpoints/floatlm_390M_300B/
409
+ remote_save_folder: null
410
+ canceled_check_interval: 50
411
+ save_interval: 500
412
+ save_interval_unsharded: 2000
413
+ save_interval_ephemeral: null
414
+ save_num_checkpoints_to_keep: 1
415
+ save_num_unsharded_checkpoints_to_keep: -1
416
+ save_overwrite: true
417
+ force_save_unsharded: false
418
+ no_pre_train_checkpoint: false
419
+ load_path: checkpoints/floatlm_390M_300B//latest
420
+ load_path_sharded_checkpointer: null
421
+ try_load_latest_save: false
422
+ reset_optimizer_state: false
423
+ reset_trainer_state: false
424
+ sharded_checkpointer: torch_legacy
425
+ new_style_checkpoints: null
426
+ max_duration: 150000
427
+ global_train_batch_size: 1024
428
+ device_train_batch_size: 4
429
+ device_train_microbatch_size: 4
430
+ device_eval_batch_size: 4
431
+ eval_subset_num_batches: -1
432
+ eval_on_load: false
433
+ device_train_grad_accum: 1
434
+ max_grad_norm: 1.0
435
+ max_grad_norm_ratio: null
436
+ precision: amp_bf16
437
+ wandb: null
438
+ speed_monitor:
439
+ window_size: 20
440
+ gpu_flops_available: null
441
+ console_log_interval: 1
442
+ gen1_gc_interval: 1
443
+ compile: null
444
+ distributed_strategy: fsdp
445
+ fsdp:
446
+ use_orig_params: true
447
+ sharding_strategy: _HYBRID_SHARD_ZERO2
448
+ wrapping_strategy: null
449
+ precision: pure
450
+ hybrid_sharding_num_model_replicas: null
451
+ ddp: null
452
+ softmax_auxiliary_loss: false
453
+ auxiliary_loss_multiplier: 0.0001
454
+ time_limit: null
455
+ extra_steps_after_cancel: 10
456
+ early_stopping_factor: null
457
+ save_data_indices: true
458
+ python_profiling: false
459
+ torch_profiling: false
460
+ stop_at: 150010
461
+ stop_after: null
462
+ activation_checkpointing: null
463
+ fused_loss: null
464
+ hf_datasets_cache_dir: null
465
+ module_outputs_save_steps: null
step52000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:211ada4a67373131f95f4000db22b8f0a4f7072bd803ad1b5429dee6d61f5745
3
+ size 15308
step54000-unsharded/config.yaml ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: floatlm_390M_300B
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 1024
7
+ n_heads: 16
8
+ n_kv_heads: null
9
+ clip_qkv: null
10
+ n_layers: 24
11
+ mlp_ratio: 4
12
+ mlp_hidden_size: 5120
13
+ activation_type: swiglu
14
+ block_type: sequential
15
+ block_group_size: 1
16
+ alibi: false
17
+ alibi_bias_max: 8.0
18
+ rope: true
19
+ rope_full_precision: true
20
+ rope_theta: 10000
21
+ flash_attention: false
22
+ attention_dropout: 0.0
23
+ multi_query_attention: false
24
+ attention_layer_norm: false
25
+ residual_dropout: 0.0
26
+ embedding_dropout: 0.0
27
+ embedding_layer_norm: false
28
+ layer_norm_type: rms
29
+ layer_norm_with_affine: true
30
+ layer_norm_eps: 1.0e-05
31
+ attention_layer_norm_with_affine: false
32
+ max_sequence_length: 2048
33
+ include_bias: false
34
+ bias_for_layer_norm: false
35
+ scale_logits: false
36
+ vocab_size: 50277
37
+ embedding_size: 50304
38
+ weight_tying: false
39
+ eos_token_id: 50277
40
+ pad_token_id: 1
41
+ init_device: meta
42
+ init_fn: mitchell
43
+ init_std: 0.02
44
+ init_cutoff_factor: null
45
+ precision: amp_bf16
46
+ scale_emb_init: false
47
+ emb_init_std: null
48
+ norm_after: false
49
+ linear_type: floatlm
50
+ num_trilm_matrix_scales: 0
51
+ optimizer:
52
+ name: adamw
53
+ learning_rate: 0.0003
54
+ weight_decay: 0.1
55
+ betas:
56
+ - 0.9
57
+ - 0.95
58
+ eps: 1.0e-05
59
+ no_decay_norm_and_bias: null
60
+ selective_updates: false
61
+ decay_norm_and_bias: false
62
+ decay_embeddings: false
63
+ metrics_log_interval: 10
64
+ record_update_metrics: false
65
+ scheduler:
66
+ name: cosine_with_warmup
67
+ units: steps
68
+ t_warmup: 375
69
+ t_max: null
70
+ alpha_f: 0.1
71
+ grad_clip_warmup_steps: null
72
+ grad_clip_warmup_factor: null
73
+ warmup_min_lr: null
74
+ remove_weight_decay_in_second_half: false
75
+ data:
76
+ paths:
77
+ - ../slimp/train/all_combined/combined_1/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
78
+ - ../slimp/train/all_combined/combined_1/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
79
+ - ../slimp/train/all_combined/combined_1/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
80
+ - ../slimp/train/all_combined/combined_1/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
81
+ - ../slimp/train/all_combined/combined_1/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
82
+ - ../slimp/train/all_combined/combined_1/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
83
+ - ../slimp/train/all_combined/combined_1/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
84
+ - ../slimp/train/all_combined/combined_1/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
85
+ - ../slimp/train/all_combined/combined_1/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
86
+ - ../slimp/train/all_combined/combined_1/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
87
+ - ../slimp/train/all_combined/combined_1/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
88
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
89
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
90
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
91
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
92
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
93
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
94
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
95
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
96
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
97
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
98
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
99
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
100
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
101
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
102
+ - ../slimp/train/all_combined/combined_1/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
103
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
104
+ - ../slimp/train/all_combined/combined_1/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
105
+ - ../slimp/train/all_combined/combined_1/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
106
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
107
+ - ../slimp/train/all_combined/combined_1/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
108
+ - ../slimp/train/all_combined/combined_2/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
109
+ - ../slimp/train/all_combined/combined_2/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
110
+ - ../slimp/train/all_combined/combined_2/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
111
+ - ../slimp/train/all_combined/combined_2/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
112
+ - ../slimp/train/all_combined/combined_2/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
113
+ - ../slimp/train/all_combined/combined_2/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
114
+ - ../slimp/train/all_combined/combined_2/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
115
+ - ../slimp/train/all_combined/combined_2/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
116
+ - ../slimp/train/all_combined/combined_2/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
117
+ - ../slimp/train/all_combined/combined_2/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
118
+ - ../slimp/train/all_combined/combined_2/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
119
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
120
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
121
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
122
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
123
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
124
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
125
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
126
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
127
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
128
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
129
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
130
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
131
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
132
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
133
+ - ../slimp/train/all_combined/combined_2/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
134
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
135
+ - ../slimp/train/all_combined/combined_2/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
136
+ - ../slimp/train/all_combined/combined_2/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
137
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
138
+ - ../slimp/train/all_combined/combined_2/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
139
+ - ../slimp/train/all_combined/combined_3/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
140
+ - ../slimp/train/all_combined/combined_3/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
141
+ - ../slimp/train/all_combined/combined_3/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
142
+ - ../slimp/train/all_combined/combined_3/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
143
+ - ../slimp/train/all_combined/combined_3/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
144
+ - ../slimp/train/all_combined/combined_3/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
145
+ - ../slimp/train/all_combined/combined_3/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
146
+ - ../slimp/train/all_combined/combined_3/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
147
+ - ../slimp/train/all_combined/combined_3/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
148
+ - ../slimp/train/all_combined/combined_3/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
149
+ - ../slimp/train/all_combined/combined_3/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
150
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
151
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
152
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
153
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
154
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
155
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
156
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
157
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
158
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
159
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
160
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
161
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
162
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
163
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
164
+ - ../slimp/train/all_combined/combined_3/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
165
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
166
+ - ../slimp/train/all_combined/combined_3/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
167
+ - ../slimp/train/all_combined/combined_3/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
168
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
169
+ - ../slimp/train/all_combined/combined_3/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
170
+ - ../slimp/train/all_combined/combined_4/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
171
+ - ../slimp/train/all_combined/combined_4/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
172
+ - ../slimp/train/all_combined/combined_4/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
173
+ - ../slimp/train/all_combined/combined_4/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
174
+ - ../slimp/train/all_combined/combined_4/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
175
+ - ../slimp/train/all_combined/combined_4/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
176
+ - ../slimp/train/all_combined/combined_4/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
177
+ - ../slimp/train/all_combined/combined_4/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
178
+ - ../slimp/train/all_combined/combined_4/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
179
+ - ../slimp/train/all_combined/combined_4/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
180
+ - ../slimp/train/all_combined/combined_4/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
181
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
182
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
183
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
184
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
185
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
186
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
187
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
188
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
189
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
190
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
191
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
192
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
193
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
194
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
195
+ - ../slimp/train/all_combined/combined_4/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
196
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
197
+ - ../slimp/train/all_combined/combined_4/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
198
+ - ../slimp/train/all_combined/combined_4/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
199
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
200
+ - ../slimp/train/all_combined/combined_4/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
201
+ - ../slimp/train/all_combined/combined_5/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
202
+ - ../slimp/train/all_combined/combined_5/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
203
+ - ../slimp/train/all_combined/combined_5/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
204
+ - ../slimp/train/all_combined/combined_5/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
205
+ - ../slimp/train/all_combined/combined_5/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
206
+ - ../slimp/train/all_combined/combined_5/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
207
+ - ../slimp/train/all_combined/combined_5/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
208
+ - ../slimp/train/all_combined/combined_5/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
209
+ - ../slimp/train/all_combined/combined_5/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
210
+ - ../slimp/train/all_combined/combined_5/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
211
+ - ../slimp/train/all_combined/combined_5/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
212
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
213
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
214
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
215
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
216
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
217
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
218
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
219
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
220
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
221
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
222
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
223
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
224
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
225
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
226
+ - ../slimp/train/all_combined/combined_5/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
227
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
228
+ - ../slimp/train/all_combined/combined_5/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
229
+ - ../slimp/train/all_combined/combined_5/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
230
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
231
+ - ../slimp/train/all_combined/combined_5/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
232
+ - ../slimp/train/all_combined/combined_6/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
233
+ - ../slimp/train/all_combined/combined_6/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
234
+ - ../slimp/train/all_combined/combined_6/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
235
+ - ../slimp/train/all_combined/combined_6/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
236
+ - ../slimp/train/all_combined/combined_6/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
237
+ - ../slimp/train/all_combined/combined_6/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
238
+ - ../slimp/train/all_combined/combined_6/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
239
+ - ../slimp/train/all_combined/combined_6/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
240
+ - ../slimp/train/all_combined/combined_6/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
241
+ - ../slimp/train/all_combined/combined_6/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
242
+ - ../slimp/train/all_combined/combined_6/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
243
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
244
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
245
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
246
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
247
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
248
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
249
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
250
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
251
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
252
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
253
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
254
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
255
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
256
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
257
+ - ../slimp/train/all_combined/combined_6/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
258
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
259
+ - ../slimp/train/all_combined/combined_6/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
260
+ - ../slimp/train/all_combined/combined_6/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
261
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
262
+ - ../slimp/train/all_combined/combined_6/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
263
+ - ../slimp/train/all_combined/combined_7/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
264
+ - ../slimp/train/all_combined/combined_7/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
265
+ - ../slimp/train/all_combined/combined_7/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
266
+ - ../slimp/train/all_combined/combined_7/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
267
+ - ../slimp/train/all_combined/combined_7/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
268
+ - ../slimp/train/all_combined/combined_7/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
269
+ - ../slimp/train/all_combined/combined_7/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
270
+ - ../slimp/train/all_combined/combined_7/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
271
+ - ../slimp/train/all_combined/combined_7/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
272
+ - ../slimp/train/all_combined/combined_7/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
273
+ - ../slimp/train/all_combined/combined_7/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
274
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
275
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
276
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
277
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
278
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
279
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
280
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
281
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
282
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
283
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
284
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
285
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
286
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
287
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
288
+ - ../slimp/train/all_combined/combined_7/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
289
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
290
+ - ../slimp/train/all_combined/combined_7/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
291
+ - ../slimp/train/all_combined/combined_7/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
292
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
293
+ - ../slimp/train/all_combined/combined_7/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
294
+ - ../slimp/train/all_combined/combined_8/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
295
+ - ../slimp/train/all_combined/combined_8/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
296
+ - ../slimp/train/all_combined/combined_8/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
297
+ - ../slimp/train/all_combined/combined_8/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
298
+ - ../slimp/train/all_combined/combined_8/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
299
+ - ../slimp/train/all_combined/combined_8/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
300
+ - ../slimp/train/all_combined/combined_8/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
301
+ - ../slimp/train/all_combined/combined_8/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
302
+ - ../slimp/train/all_combined/combined_8/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
303
+ - ../slimp/train/all_combined/combined_8/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
304
+ - ../slimp/train/all_combined/combined_8/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
305
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
306
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
307
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
308
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
309
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
310
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
311
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
312
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
313
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
314
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
315
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
316
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
317
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
318
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
319
+ - ../slimp/train/all_combined/combined_8/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
320
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
321
+ - ../slimp/train/all_combined/combined_8/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
322
+ - ../slimp/train/all_combined/combined_8/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
323
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
324
+ - ../slimp/train/all_combined/combined_8/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
325
+ - ../slimp/train/all_combined/combined_9/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
326
+ - ../slimp/train/all_combined/combined_9/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
327
+ - ../slimp/train/all_combined/combined_9/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
328
+ - ../slimp/train/all_combined/combined_9/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
329
+ - ../slimp/train/all_combined/combined_9/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
330
+ - ../slimp/train/all_combined/combined_9/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
331
+ - ../slimp/train/all_combined/combined_9/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
332
+ - ../slimp/train/all_combined/combined_9/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
333
+ - ../slimp/train/all_combined/combined_9/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
334
+ - ../slimp/train/all_combined/combined_9/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
335
+ - ../slimp/train/all_combined/combined_9/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
336
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
337
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
338
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
339
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
340
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
341
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
342
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
343
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
344
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
345
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
346
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
347
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
348
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
349
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
350
+ - ../slimp/train/all_combined/combined_9/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
351
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
352
+ - ../slimp/train/all_combined/combined_9/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
353
+ - ../slimp/train/all_combined/combined_9/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
354
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
355
+ - ../slimp/train/all_combined/combined_9/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
356
+ - ../slimp/train/all_combined/combined_10/redpajamaarxiv_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
357
+ - ../slimp/train/all_combined/combined_10/redpajamabook_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
358
+ - ../slimp/train/all_combined/combined_10/redpajamabook_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
359
+ - ../slimp/train/all_combined/combined_10/redpajamac4_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
360
+ - ../slimp/train/all_combined/combined_10/redpajamac4_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
361
+ - ../slimp/train/all_combined/combined_10/redpajamac4_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
362
+ - ../slimp/train/all_combined/combined_10/redpajamac4_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
363
+ - ../slimp/train/all_combined/combined_10/redpajamac4_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
364
+ - ../slimp/train/all_combined/combined_10/redpajamac4_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
365
+ - ../slimp/train/all_combined/combined_10/redpajamac4_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
366
+ - ../slimp/train/all_combined/combined_10/redpajamac4_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
367
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
368
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_10.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
369
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_11.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
370
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_12.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
371
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_13.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
372
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_14.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
373
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_15.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
374
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
375
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_3.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
376
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_4.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
377
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_5.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
378
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_6.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
379
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_7.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
380
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_8.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
381
+ - ../slimp/train/all_combined/combined_10/redpajamacommoncrawl_9.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
382
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
383
+ - ../slimp/train/all_combined/combined_10/redpajamagithub_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
384
+ - ../slimp/train/all_combined/combined_10/redpajamastackexchange_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
385
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_1.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
386
+ - ../slimp/train/all_combined/combined_10/redpajamawikipedia_2.jsonl.gz.tokenized.spectra.out.npy/0_00000.npy
387
+ memmap_dtype: uint16
388
+ datasets: null
389
+ label_mask_paths: null
390
+ pad_direction: right
391
+ generate_attention_mask: false
392
+ generate_doc_lengths: false
393
+ num_workers: 0
394
+ drop_last: true
395
+ pin_memory: true
396
+ prefetch_factor: 16
397
+ persistent_workers: true
398
+ timeout: 0
399
+ seed: null
400
+ instance_filter: null
401
+ restore_dataloader: true
402
+ fast_forward_batches: null
403
+ evaluators: []
404
+ eval_interval: 500
405
+ tokenizer:
406
+ identifier: ../spectra_tokenizer/tokenizer.json
407
+ truncate_direction: right
408
+ save_folder: checkpoints/floatlm_390M_300B/
409
+ remote_save_folder: null
410
+ canceled_check_interval: 50
411
+ save_interval: 500
412
+ save_interval_unsharded: 2000
413
+ save_interval_ephemeral: null
414
+ save_num_checkpoints_to_keep: 1
415
+ save_num_unsharded_checkpoints_to_keep: -1
416
+ save_overwrite: true
417
+ force_save_unsharded: false
418
+ no_pre_train_checkpoint: false
419
+ load_path: checkpoints/floatlm_390M_300B//latest
420
+ load_path_sharded_checkpointer: null
421
+ try_load_latest_save: false
422
+ reset_optimizer_state: false
423
+ reset_trainer_state: false
424
+ sharded_checkpointer: torch_legacy
425
+ new_style_checkpoints: null
426
+ max_duration: 150000
427
+ global_train_batch_size: 1024
428
+ device_train_batch_size: 4
429
+ device_train_microbatch_size: 4
430
+ device_eval_batch_size: 4
431
+ eval_subset_num_batches: -1
432
+ eval_on_load: false
433
+ device_train_grad_accum: 1
434
+ max_grad_norm: 1.0
435
+ max_grad_norm_ratio: null
436
+ precision: amp_bf16
437
+ wandb: null
438
+ speed_monitor:
439
+ window_size: 20
440
+ gpu_flops_available: null
441
+ console_log_interval: 1
442
+ gen1_gc_interval: 1
443
+ compile: null
444
+ distributed_strategy: fsdp
445
+ fsdp:
446
+ use_orig_params: true
447
+ sharding_strategy: _HYBRID_SHARD_ZERO2
448
+ wrapping_strategy: null
449
+ precision: pure
450
+ hybrid_sharding_num_model_replicas: null
451
+ ddp: null
452
+ softmax_auxiliary_loss: false
453
+ auxiliary_loss_multiplier: 0.0001
454
+ time_limit: null
455
+ extra_steps_after_cancel: 10
456
+ early_stopping_factor: null
457
+ save_data_indices: true
458
+ python_profiling: false
459
+ torch_profiling: false
460
+ stop_at: 150010
461
+ stop_after: null
462
+ activation_checkpointing: null
463
+ fused_loss: null
464
+ hf_datasets_cache_dir: null
465
+ module_outputs_save_steps: null
step66000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d5a6be0260a3cc023ab8a08811428f00329c8b7c2d09b8b4bfcfce8dff7fa1a
3
+ size 15500
step68000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48cc35396b2014f5617a04f6f2c15ab91da86d60104f62ff9963fe19ee828d33
3
+ size 15564
step78000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86754f8fd82fbca6cf8cdb72964cfd641e22c09d7676cb1cddfa3c4b236e6eb6
3
+ size 15756
step98000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bcf1d40907eca4170c399fe3134ae4b50694bdaf7042ba7a7268256e07a7f28
3
+ size 16076