| !!python/object/apply:collections.OrderedDict | |
| - - - batch_size | |
| - 256 | |
| - - buffer_size | |
| - 200000 | |
| - - callback | |
| - - rl_zoo3.callbacks.ParallelTrainCallback: | |
| gradient_steps: 200 | |
| - rl_zoo3.callbacks.LapTimeCallback | |
| - - ent_coef | |
| - auto | |
| - - env_wrapper | |
| - - ae.wrapper.AutoencoderWrapper | |
| - rl_zoo3.wrappers.HistoryWrapper: | |
| horizon: 2 | |
| - - gamma | |
| - 0.99 | |
| - - gradient_steps | |
| - 256 | |
| - - learning_rate | |
| - 0.00073 | |
| - - learning_starts | |
| - 500 | |
| - - n_timesteps | |
| - 2000000.0 | |
| - - normalize | |
| - '{''norm_obs'': True, ''norm_reward'': False}' | |
| - - policy | |
| - MlpPolicy | |
| - - policy_kwargs | |
| - dict(log_std_init=-3, net_arch=[256, 256], n_critics=2, use_expln=True) | |
| - - sde_sample_freq | |
| - 16 | |
| - - tau | |
| - 0.02 | |
| - - train_freq | |
| - 200 | |
| - - use_sde | |
| - true | |
| - - use_sde_at_warmup | |
| - true | |