File size: 991 Bytes
4137f12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
seed: 0
exp_name: pythia160m_hh_ga4_dpo
batch_size: 32
eval_batch_size: 32
debug: false
fsdp_port: 12355
datasets:
- hh
wandb:
enabled: true
entity: pythia_dpo
project: Pythia_LOM
local_dirs:
- /scr-ssd
- /scr
- .cache
sample_during_eval: false
n_eval_model_samples: 16
do_first_eval: true
local_run_dir: .cache/laura/pythia160m_hh_ga4_dpo_2023-07-18_14-44-17_392406
lr: 1.0e-06
gradient_accumulation_steps: 4
max_grad_norm: 10.0
max_length: 512
max_prompt_length: 256
n_epochs: 1
n_examples: null
n_eval_examples: 256
trainer: FSDPTrainer
optimizer: RMSprop
warmup_steps: 150
activation_checkpointing: false
eval_every: 20000
minimum_log_interval_secs: 1.0
model:
name_or_path: EleutherAI/pythia-160m
tokenizer_name_or_path: null
archive: .cache/laura/pythia160m_hh_ga4_sft_2023-07-18_13-09-00_785022/step-159744/policy.pt
block_name: GPTNeoXLayer
policy_dtype: float32
fsdp_policy_mp: null
reference_dtype: float16
loss:
name: dpo
beta: 0.1
reference_free: false
|