duytrq commited on
Commit
071c42c
1 Parent(s): f4c8fd0

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +33 -0
config.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exp_name: 'stream-data-v1'
2
+
3
+ # Training dataset (from Huggingface)
4
+ data_source: "MedCat/MedCAT-PT-v1"
5
+
6
+ # The base model (from HuggingFace model hub)
7
+ model_name: "Qwen/Qwen2.5-0.5B"
8
+
9
+ # Tokenizer
10
+ tokenizer_device: 'cpu' # 'cpu', 'cuda:0', 'cuda:1'
11
+ tokenizer_batch_size: 1_000
12
+ max_length: 512
13
+
14
+ # Checkpoints configuration
15
+ output_folder: "./checkpoints/MedCAT-PT" # Where to save checkpoints during the training
16
+ save_total_limit: 2 # Limit on number of checkpoints to keep
17
+ save_model_to: "./checkpoints/MedCAT-PT/" # Where to save the last checkpoint + base_model + data_version
18
+ save_strategy: "steps" # Saving strategy (either 'steps' or 'epoch')
19
+ save_steps: 50_000 # Save model every ... steps
20
+
21
+ # Logging configuration
22
+ logging_dir: "./logs" # Directory for logs + base_model + data_version
23
+ logging_steps: 1_000 # Frequency of logging
24
+
25
+ # Training configuration
26
+ per_device_train_batch_size: 16 # Training batch size
27
+ per_device_eval_batch_size: 16 # Evaluation batch size
28
+ num_train_epochs: 1 # Number of epochs
29
+ # max_steps: 500 # Total training steps (or use num_train_epochs instead)
30
+ eval_steps: 100_000 # Frequency of evaluation. Should equal to logging_steps (can be different, but should be equal)
31
+ evaluation_samples: 200_000 # evaluation samples used to evaluate the model during training process
32
+ evaluation_strategy: "steps" # Evaluation strategy (either 'steps' or 'epoch')
33
+ seed: 3407 # Random seed for reproducibility