File size: 223 Bytes
9b518d8
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
cls: HF
base_tokenizer_path: microsoft/Phi-3-mini-128k-instruct
dataset:
  path: allenai/c4
  data_dir: fr
  name: c4_fr
  split: train
  column: text
target_num_hyper_token: 10
batch_size: 1000
total_training_size: 100000