File size: 223 Bytes
9b518d8 |
1 2 3 4 5 6 7 8 9 10 11 12 |
cls: HF
base_tokenizer_path: microsoft/Phi-3-mini-128k-instruct
dataset:
path: allenai/c4
data_dir: fr
name: c4_fr
split: train
column: text
target_num_hyper_token: 10
batch_size: 1000
total_training_size: 100000
|