|
accelerator_kwargs/logging_dir: /content/results/R-best-fine-tuned-bart-base-full-ft-reward_short_sentences_and_words-2023-07-13T06-49-08/runs |
|
adap_kl_ctrl: true |
|
batch_size: 64 |
|
cliprange: 0.2 |
|
cliprange_value: 0.2 |
|
compare_steps: 100 |
|
early_stopping: false |
|
forward_batch_size: null |
|
gamma: 1 |
|
gradient_accumulation_steps: 1 |
|
horizon: 10000 |
|
init_kl_coef: 0.2 |
|
lam: 0.95 |
|
learning_rate: 1.0e-06 |
|
log_with: tensorboard |
|
max_grad_norm: null |
|
mini_batch_size: 1 |
|
model_name: R-best-fine-tuned-bart-base-full-ft-reward_short_sentences_and_words-2023-07-13T06-49-08 |
|
optimize_cuda_cache: false |
|
ppo_epochs: 4 |
|
push_to_hub_if_best_kwargs/commit_message: 'add: new best model' |
|
push_to_hub_if_best_kwargs/private: true |
|
push_to_hub_if_best_kwargs/repo_id: nlp-lab-2023-seq2seq/R-best-fine-tuned-bart-base-full-ft-reward_short_sentences_and_words-2023-07-13T06-49-08 |
|
remove_unused_columns: true |
|
seed: 0 |
|
steps: 10000 |
|
target: 6 |
|
target_kl: 0.1 |
|
total_ppo_epochs: 157 |
|
tracker_project_name: trl |
|
vf_coef: 0.1 |
|
|