{"run_name": "llama3_dpo_mathinstruct_base", "output_dir": "/scratch/toskov/mnlp/output", "data_path": "/scratch/toskov/mnlp/dpo_hf_dataset_nosysmsg.json", "model_id": "meta-llama/Meta-Llama-3-8B-Instruct", "pretrained_peft_path": "/scratch/toskov/mnlp/output/llama3_sft_mathinstruct_noquant_nosysmsg", "use_quantization": false, "batch_size": 1, "seed": 10107, "prompt_length": 402, "max_seq_length": 912}