drop empty tokenized rows too (#509)
Browse files
src/axolotl/utils/trainer.py
CHANGED
@@ -361,7 +361,7 @@ def add_position_ids(sample):
|
|
361 |
|
362 |
|
363 |
def drop_long_seq(sample, sequence_len=2048):
|
364 |
-
return len(sample["input_ids"]) <= sequence_len
|
365 |
|
366 |
|
367 |
@contextmanager
|
|
|
361 |
|
362 |
|
363 |
def drop_long_seq(sample, sequence_len=2048):
|
364 |
+
return len(sample["input_ids"]) <= sequence_len and len(sample["input_ids"]) > 0
|
365 |
|
366 |
|
367 |
@contextmanager
|