File size: 333 Bytes
d90b3a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
import pytest
from megatron.tokenizer import train_tokenizer
@pytest.mark.cpu
def test_train_tokenizer():
input_args = [
"--json_input_dir",
"./tests/data/enwik8_first100.txt",
"--tokenizer_output_path",
"",
]
args = train_tokenizer.parse_args(input_args)
train_tokenizer.main(args)
|