|
seed: 0 |
|
exp_name: pythia_DPO_ga4 |
|
batch_size: 32 |
|
eval_batch_size: 32 |
|
debug: false |
|
fsdp_port: 12355 |
|
datasets: |
|
- hh |
|
wandb: |
|
enabled: true |
|
entity: pythia_dpo |
|
project: Pythia_LOM |
|
local_dirs: |
|
- /scr-ssd |
|
- /scr |
|
- .cache |
|
sample_during_eval: false |
|
n_eval_model_samples: 16 |
|
do_first_eval: true |
|
local_run_dir: .cache/laura/pythia_DPO_ga4_2023-07-16_20-48-32_998171 |
|
lr: 1.0e-06 |
|
gradient_accumulation_steps: 4 |
|
max_grad_norm: 10.0 |
|
max_length: 512 |
|
max_prompt_length: 256 |
|
n_epochs: 1 |
|
n_examples: null |
|
n_eval_examples: 256 |
|
trainer: FSDPTrainer |
|
optimizer: RMSprop |
|
warmup_steps: 150 |
|
activation_checkpointing: false |
|
eval_every: 20000 |
|
minimum_log_interval_secs: 1.0 |
|
model: |
|
name_or_path: EleutherAI/pythia-70m |
|
tokenizer_name_or_path: null |
|
archive: .cache/laura/pythia_SFT_ga4_2023-07-16_16-50-13_244945/step-159744/policy.pt |
|
block_name: GPTNeoXLayer |
|
policy_dtype: float32 |
|
fsdp_policy_mp: null |
|
reference_dtype: float16 |
|
loss: |
|
name: dpo |
|
beta: 0.1 |
|
reference_free: false |
|
|