exp_config = { | |
'type': 'ppo', | |
'on_policy': True, | |
'cuda': True, | |
'action_space': 'continuous', | |
'discount_factor': 0.99, | |
'gae_lambda': 0.95, | |
'epoch_per_collect': 10, | |
'batch_size': 64, | |
'learning_rate': 0.0003, | |
'lr_scheduler': None, | |
'weight_decay': 0, | |
'value_weight': 0.5, | |
'entropy_weight': 0.01, | |
'clip_ratio': 0.2, | |
'adv_norm': True, | |
'value_norm': 'baseline', | |
'ppo_param_init': True, | |
'grad_norm': 0.5, | |
'n_sample': 400, | |
'unroll_len': 1, | |
'deterministic_eval': True, | |
'model': {}, | |
'cfg_type': 'PPOFPolicyDict', | |
'env_id': 'Pendulum-v1', | |
'exp_name': 'Pendulum-v1-PPO' | |
} | |