!!python/object/apply:collections.OrderedDict | |
- - - batch_size | |
- 2048 | |
- - buffer_size | |
- 1000000 | |
- - ent_coef | |
- auto | |
- - gamma | |
- 0.95 | |
- - learning_rate | |
- 0.001 | |
- - learning_starts | |
- 100 | |
- - n_timesteps | |
- 5000000.0 | |
- - normalize | |
- true | |
- - policy | |
- MultiInputPolicy | |
- - policy_kwargs | |
- dict(net_arch=[512, 512, 512], n_critics=2) | |
- - replay_buffer_class | |
- HerReplayBuffer | |
- - replay_buffer_kwargs | |
- dict( goal_selection_strategy='future', n_sampled_goal=4 ) | |
- - tau | |
- 0.05 | |