whoops, gt vs lt
Browse files
src/axolotl/utils/data.py
CHANGED
@@ -213,7 +213,7 @@ def load_prepare_datasets(tokenizer, cfg, default_dataset_prepared_path):
|
|
213 |
[
|
214 |
d
|
215 |
for d in dataset
|
216 |
-
if len(d["input_ids"])
|
217 |
and len(d["input_ids"]) > 0
|
218 |
and len(d["input_ids"]) == len(d["attention_mask"])
|
219 |
and len(d["input_ids"]) == len(d["labels"])
|
|
|
213 |
[
|
214 |
d
|
215 |
for d in dataset
|
216 |
+
if len(d["input_ids"]) < cfg.sequence_len
|
217 |
and len(d["input_ids"]) > 0
|
218 |
and len(d["input_ids"]) == len(d["attention_mask"])
|
219 |
and len(d["input_ids"]) == len(d["labels"])
|