File size: 333 Bytes
d90b3a8
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import pytest
from megatron.tokenizer import train_tokenizer


@pytest.mark.cpu
def test_train_tokenizer():
    input_args = [
        "--json_input_dir",
        "./tests/data/enwik8_first100.txt",
        "--tokenizer_output_path",
        "",
    ]
    args = train_tokenizer.parse_args(input_args)
    train_tokenizer.main(args)