!!python/object/apply:collections.OrderedDict - - - batch_size - 2048 - - buffer_size - 1000000 - - ent_coef - auto - - gamma - 0.95 - - learning_rate - 0.001 - - learning_starts - 100 - - n_timesteps - 5000000.0 - - normalize - true - - policy - MultiInputPolicy - - policy_kwargs - dict(net_arch=[512, 512, 512], n_critics=2) - - replay_buffer_class - HerReplayBuffer - - replay_buffer_kwargs - dict( goal_selection_strategy='future', n_sampled_goal=4 ) - - tau - 0.05