cls: HF | |
base_tokenizer_path: microsoft/Phi-3-mini-128k-instruct | |
dataset: | |
path: allenai/c4 | |
data_dir: fr | |
name: c4_fr | |
split: train | |
column: text | |
target_num_hyper_token: 10 | |
batch_size: 1000 | |
total_training_size: 100000 | |
cls: HF | |
base_tokenizer_path: microsoft/Phi-3-mini-128k-instruct | |
dataset: | |
path: allenai/c4 | |
data_dir: fr | |
name: c4_fr | |
split: train | |
column: text | |
target_num_hyper_token: 10 | |
batch_size: 1000 | |
total_training_size: 100000 | |