Initial Commit
Browse files- README.md +2 -7
- args.yml +25 -7
- config.yml +1 -11
- replay.mp4 +2 -2
- results.json +1 -1
- sac-Pendulum-v1.zip +2 -2
- sac-Pendulum-v1/actor.optimizer.pth +2 -2
- sac-Pendulum-v1/critic.optimizer.pth +2 -2
- sac-Pendulum-v1/data +39 -42
- sac-Pendulum-v1/ent_coef_optimizer.pth +1 -1
- sac-Pendulum-v1/policy.pth +2 -2
- sac-Pendulum-v1/pytorch_variables.pth +1 -1
- train_eval_metrics.zip +2 -2
README.md
CHANGED
@@ -10,7 +10,7 @@ model-index:
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
-
value: -
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
@@ -50,13 +50,8 @@ python -m utils.push_to_hub --algo sac --env Pendulum-v1 -f logs/ -orga sb3
|
|
50 |
|
51 |
## Hyperparameters
|
52 |
```python
|
53 |
-
OrderedDict([('
|
54 |
-
('learning_rate', 0.001),
|
55 |
-
('n_episodes_rollout', 1),
|
56 |
('n_timesteps', 20000),
|
57 |
('policy', 'MlpPolicy'),
|
58 |
-
('policy_kwargs', 'dict(log_std_init=-2, net_arch=[64, 64])'),
|
59 |
-
('train_freq', -1),
|
60 |
-
('use_sde', True),
|
61 |
('normalize', False)])
|
62 |
```
|
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
+
value: -176.33 +/- 101.55
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
|
|
50 |
|
51 |
## Hyperparameters
|
52 |
```python
|
53 |
+
OrderedDict([('learning_rate', 0.001),
|
|
|
|
|
54 |
('n_timesteps', 20000),
|
55 |
('policy', 'MlpPolicy'),
|
|
|
|
|
|
|
56 |
('normalize', False)])
|
57 |
```
|
args.yml
CHANGED
@@ -1,24 +1,28 @@
|
|
1 |
!!python/object/apply:collections.OrderedDict
|
2 |
- - - algo
|
3 |
- sac
|
|
|
|
|
4 |
- - env
|
5 |
-
- Pendulum-
|
6 |
- - env_kwargs
|
7 |
- null
|
8 |
- - eval_episodes
|
9 |
- 5
|
10 |
- - eval_freq
|
11 |
-
-
|
12 |
- - gym_packages
|
13 |
- []
|
14 |
- - hyperparams
|
15 |
- null
|
16 |
- - log_folder
|
17 |
-
-
|
18 |
- - log_interval
|
19 |
- -1
|
|
|
|
|
20 |
- - n_evaluations
|
21 |
-
-
|
22 |
- - n_jobs
|
23 |
- 1
|
24 |
- - n_startup_trials
|
@@ -26,9 +30,13 @@
|
|
26 |
- - n_timesteps
|
27 |
- -1
|
28 |
- - n_trials
|
29 |
-
-
|
|
|
|
|
30 |
- - num_threads
|
31 |
-
-
|
|
|
|
|
32 |
- - optimize_hyperparameters
|
33 |
- false
|
34 |
- - pruner
|
@@ -40,16 +48,26 @@
|
|
40 |
- - save_replay_buffer
|
41 |
- false
|
42 |
- - seed
|
43 |
-
-
|
44 |
- - storage
|
45 |
- null
|
46 |
- - study_name
|
47 |
- null
|
48 |
- - tensorboard_log
|
49 |
- ''
|
|
|
|
|
50 |
- - trained_agent
|
51 |
- ''
|
|
|
|
|
52 |
- - uuid
|
53 |
- false
|
|
|
|
|
54 |
- - verbose
|
55 |
- 1
|
|
|
|
|
|
|
|
|
|
1 |
!!python/object/apply:collections.OrderedDict
|
2 |
- - - algo
|
3 |
- sac
|
4 |
+
- - device
|
5 |
+
- auto
|
6 |
- - env
|
7 |
+
- Pendulum-v1
|
8 |
- - env_kwargs
|
9 |
- null
|
10 |
- - eval_episodes
|
11 |
- 5
|
12 |
- - eval_freq
|
13 |
+
- 25000
|
14 |
- - gym_packages
|
15 |
- []
|
16 |
- - hyperparams
|
17 |
- null
|
18 |
- - log_folder
|
19 |
+
- logs/
|
20 |
- - log_interval
|
21 |
- -1
|
22 |
+
- - n_eval_envs
|
23 |
+
- 1
|
24 |
- - n_evaluations
|
25 |
+
- null
|
26 |
- - n_jobs
|
27 |
- 1
|
28 |
- - n_startup_trials
|
|
|
30 |
- - n_timesteps
|
31 |
- -1
|
32 |
- - n_trials
|
33 |
+
- 500
|
34 |
+
- - no_optim_plots
|
35 |
+
- false
|
36 |
- - num_threads
|
37 |
+
- -1
|
38 |
+
- - optimization_log_path
|
39 |
+
- null
|
40 |
- - optimize_hyperparameters
|
41 |
- false
|
42 |
- - pruner
|
|
|
48 |
- - save_replay_buffer
|
49 |
- false
|
50 |
- - seed
|
51 |
+
- 561540835
|
52 |
- - storage
|
53 |
- null
|
54 |
- - study_name
|
55 |
- null
|
56 |
- - tensorboard_log
|
57 |
- ''
|
58 |
+
- - track
|
59 |
+
- false
|
60 |
- - trained_agent
|
61 |
- ''
|
62 |
+
- - truncate_last_trajectory
|
63 |
+
- true
|
64 |
- - uuid
|
65 |
- false
|
66 |
+
- - vec_env
|
67 |
+
- dummy
|
68 |
- - verbose
|
69 |
- 1
|
70 |
+
- - wandb_entity
|
71 |
+
- null
|
72 |
+
- - wandb_project_name
|
73 |
+
- sb3
|
config.yml
CHANGED
@@ -1,17 +1,7 @@
|
|
1 |
!!python/object/apply:collections.OrderedDict
|
2 |
-
- - -
|
3 |
-
- -1
|
4 |
-
- - learning_rate
|
5 |
- 0.001
|
6 |
-
- - n_episodes_rollout
|
7 |
-
- 1
|
8 |
- - n_timesteps
|
9 |
- 20000
|
10 |
- - policy
|
11 |
- MlpPolicy
|
12 |
-
- - policy_kwargs
|
13 |
-
- dict(log_std_init=-2, net_arch=[64, 64])
|
14 |
-
- - train_freq
|
15 |
-
- -1
|
16 |
-
- - use_sde
|
17 |
-
- true
|
|
|
1 |
!!python/object/apply:collections.OrderedDict
|
2 |
+
- - - learning_rate
|
|
|
|
|
3 |
- 0.001
|
|
|
|
|
4 |
- - n_timesteps
|
5 |
- 20000
|
6 |
- - policy
|
7 |
- MlpPolicy
|
|
|
|
|
|
|
|
|
|
|
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9459b5b8d9aef043e3253027a511d82e40803247f38c1033492925045b67a966
|
3 |
+
size 195933
|
results.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"mean_reward": -
|
|
|
1 |
+
{"mean_reward": -176.32888, "std_reward": 101.54794864862134, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-05-22T22:23:07.885400"}
|
sac-Pendulum-v1.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2db35a1adf90a69633940e71bad1f481edc75208342d9fa18509f418fadaa51c
|
3 |
+
size 3005853
|
sac-Pendulum-v1/actor.optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cddb9ae9fe6a55c343cc452511559ee5c87394a3febfa996ae5234cd6d4650b
|
3 |
+
size 542837
|
sac-Pendulum-v1/critic.optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:017ac97f65c5e7ce0133454a8fc1cbf9addecb4ac180d21939f0e1b8fd38e875
|
3 |
+
size 1083485
|
sac-Pendulum-v1/data
CHANGED
@@ -4,51 +4,46 @@
|
|
4 |
":serialized:": "gAWVMAAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLnNhYy5wb2xpY2llc5SMCVNBQ1BvbGljeZSTlC4=",
|
5 |
"__module__": "stable_baselines3.sac.policies",
|
6 |
"__doc__": "\n Policy class (with both actor and critic) for SAC.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param sde_net_arch: Network architecture for extracting features\n when using gSDE. If None, the latent features from the policy will be used.\n Pass an empty list to use the states as features.\n :param use_expln: Use ``expln()`` function instead of ``exp()`` when using gSDE to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param clip_mean: Clip the mean output when using gSDE to avoid numerical instability.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n :param n_critics: Number of critic networks to create.\n :param share_features_extractor: Whether to share or not the features extractor\n between the actor and the critic (this saves computation time)\n ",
|
7 |
-
"__init__": "<function SACPolicy.__init__ at
|
8 |
-
"_build": "<function SACPolicy._build at
|
9 |
-
"_get_constructor_parameters": "<function SACPolicy._get_constructor_parameters at
|
10 |
-
"reset_noise": "<function SACPolicy.reset_noise at
|
11 |
-
"make_actor": "<function SACPolicy.make_actor at
|
12 |
-
"make_critic": "<function SACPolicy.make_critic at
|
13 |
-
"forward": "<function SACPolicy.forward at
|
14 |
-
"_predict": "<function SACPolicy._predict at
|
15 |
-
"set_training_mode": "<function SACPolicy.set_training_mode at
|
16 |
"__abstractmethods__": "frozenset()",
|
17 |
-
"_abc_impl": "<_abc_data object at
|
18 |
},
|
19 |
"verbose": 1,
|
20 |
"policy_kwargs": {
|
21 |
-
"
|
22 |
-
"net_arch": [
|
23 |
-
64,
|
24 |
-
64
|
25 |
-
],
|
26 |
-
"use_sde": true
|
27 |
},
|
28 |
"observation_space": {
|
29 |
":type:": "<class 'gym.spaces.box.Box'>",
|
30 |
-
":serialized:": "
|
31 |
"dtype": "float32",
|
|
|
|
|
|
|
32 |
"low": "[-1. -1. -8.]",
|
33 |
"high": "[1. 1. 8.]",
|
34 |
"bounded_below": "[ True True True]",
|
35 |
"bounded_above": "[ True True True]",
|
36 |
-
"
|
37 |
-
3
|
38 |
-
],
|
39 |
-
"_np_random": "RandomState(MT19937)"
|
40 |
},
|
41 |
"action_space": {
|
42 |
":type:": "<class 'gym.spaces.box.Box'>",
|
43 |
-
":serialized:": "gAWV4QsAAAAAAACMDmd5bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lGgFk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////
|
44 |
"dtype": "float32",
|
|
|
|
|
|
|
45 |
"low": "[-2.]",
|
46 |
"high": "[2.]",
|
47 |
"bounded_below": "[ True]",
|
48 |
"bounded_above": "[ True]",
|
49 |
-
"_shape": [
|
50 |
-
1
|
51 |
-
],
|
52 |
"_np_random": "RandomState(MT19937)"
|
53 |
},
|
54 |
"n_envs": 1,
|
@@ -57,62 +52,64 @@
|
|
57 |
"_num_timesteps_at_start": 0,
|
58 |
"seed": 0,
|
59 |
"action_noise": null,
|
60 |
-
"start_time":
|
61 |
"learning_rate": {
|
62 |
":type:": "<class 'function'>",
|
63 |
-
":serialized:": "
|
64 |
},
|
65 |
"tensorboard_log": null,
|
66 |
"lr_schedule": {
|
67 |
":type:": "<class 'function'>",
|
68 |
-
":serialized:": "
|
69 |
},
|
70 |
"_last_obs": null,
|
71 |
-
"_last_episode_starts":
|
|
|
|
|
|
|
72 |
"_last_original_obs": {
|
73 |
":type:": "<class 'numpy.ndarray'>",
|
74 |
-
":serialized:": "
|
75 |
},
|
76 |
"_episode_num": 100,
|
77 |
-
"use_sde":
|
78 |
"sde_sample_freq": -1,
|
79 |
"_current_progress_remaining": 0.0,
|
80 |
"ep_info_buffer": {
|
81 |
":type:": "<class 'collections.deque'>",
|
82 |
-
":serialized:": "gAWVHRAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpSMFW51bXB5LmNvcmUubXVsdGlhcnJheZSMBnNjYWxhcpSTlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////
|
83 |
},
|
84 |
"ep_success_buffer": {
|
85 |
":type:": "<class 'collections.deque'>",
|
86 |
":serialized:": "gAWVIAAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKULg=="
|
87 |
},
|
88 |
-
"_n_updates":
|
89 |
"buffer_size": 1,
|
90 |
"batch_size": 256,
|
91 |
"learning_starts": 100,
|
92 |
"tau": 0.005,
|
93 |
"gamma": 0.99,
|
94 |
-
"gradient_steps":
|
95 |
"optimize_memory_usage": false,
|
96 |
"replay_buffer_class": {
|
97 |
":type:": "<class 'abc.ABCMeta'>",
|
98 |
":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
|
99 |
"__module__": "stable_baselines3.common.buffers",
|
100 |
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device:\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
101 |
-
"__init__": "<function ReplayBuffer.__init__ at
|
102 |
-
"add": "<function ReplayBuffer.add at
|
103 |
-
"sample": "<function ReplayBuffer.sample at
|
104 |
-
"_get_samples": "<function ReplayBuffer._get_samples at
|
105 |
"__abstractmethods__": "frozenset()",
|
106 |
-
"_abc_impl": "<_abc_data object at
|
107 |
},
|
108 |
"replay_buffer_kwargs": {},
|
109 |
"train_freq": {
|
110 |
":type:": "<class 'stable_baselines3.common.type_aliases.TrainFreq'>",
|
111 |
-
":serialized:": "
|
112 |
},
|
113 |
"use_sde_at_warmup": false,
|
114 |
"target_entropy": -1.0,
|
115 |
"ent_coef": "auto",
|
116 |
-
"target_update_interval": 1
|
117 |
-
"n_episodes_rollout": 1
|
118 |
}
|
|
|
4 |
":serialized:": "gAWVMAAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLnNhYy5wb2xpY2llc5SMCVNBQ1BvbGljeZSTlC4=",
|
5 |
"__module__": "stable_baselines3.sac.policies",
|
6 |
"__doc__": "\n Policy class (with both actor and critic) for SAC.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param sde_net_arch: Network architecture for extracting features\n when using gSDE. If None, the latent features from the policy will be used.\n Pass an empty list to use the states as features.\n :param use_expln: Use ``expln()`` function instead of ``exp()`` when using gSDE to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param clip_mean: Clip the mean output when using gSDE to avoid numerical instability.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n :param n_critics: Number of critic networks to create.\n :param share_features_extractor: Whether to share or not the features extractor\n between the actor and the critic (this saves computation time)\n ",
|
7 |
+
"__init__": "<function SACPolicy.__init__ at 0x7f5e9b6a1b00>",
|
8 |
+
"_build": "<function SACPolicy._build at 0x7f5e9b6a1b90>",
|
9 |
+
"_get_constructor_parameters": "<function SACPolicy._get_constructor_parameters at 0x7f5e9b6a1c20>",
|
10 |
+
"reset_noise": "<function SACPolicy.reset_noise at 0x7f5e9b6a1cb0>",
|
11 |
+
"make_actor": "<function SACPolicy.make_actor at 0x7f5e9b6a1d40>",
|
12 |
+
"make_critic": "<function SACPolicy.make_critic at 0x7f5e9b6a1dd0>",
|
13 |
+
"forward": "<function SACPolicy.forward at 0x7f5e9b6a1e60>",
|
14 |
+
"_predict": "<function SACPolicy._predict at 0x7f5e9b6a1ef0>",
|
15 |
+
"set_training_mode": "<function SACPolicy.set_training_mode at 0x7f5e9b6a1f80>",
|
16 |
"__abstractmethods__": "frozenset()",
|
17 |
+
"_abc_impl": "<_abc_data object at 0x7f5e9b68e8d0>"
|
18 |
},
|
19 |
"verbose": 1,
|
20 |
"policy_kwargs": {
|
21 |
+
"use_sde": false
|
|
|
|
|
|
|
|
|
|
|
22 |
},
|
23 |
"observation_space": {
|
24 |
":type:": "<class 'gym.spaces.box.Box'>",
|
25 |
+
":serialized:": "gAWVbQEAAAAAAACMDmd5bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lGgFk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMBl9zaGFwZZRLA4WUjANsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWDAAAAAAAAAAAAIC/AACAvwAAAMGUaApLA4WUjAFDlHSUUpSMBGhpZ2iUaBIolgwAAAAAAAAAAACAPwAAgD8AAABBlGgKSwOFlGgVdJRSlIwNYm91bmRlZF9iZWxvd5RoEiiWAwAAAAAAAAABAQGUaAeMAmIxlImIh5RSlChLA4wBfJROTk5K/////0r/////SwB0lGJLA4WUaBV0lFKUjA1ib3VuZGVkX2Fib3ZllGgSKJYDAAAAAAAAAAEBAZRoIUsDhZRoFXSUUpSMCl9ucF9yYW5kb22UTnViLg==",
|
26 |
"dtype": "float32",
|
27 |
+
"_shape": [
|
28 |
+
3
|
29 |
+
],
|
30 |
"low": "[-1. -1. -8.]",
|
31 |
"high": "[1. 1. 8.]",
|
32 |
"bounded_below": "[ True True True]",
|
33 |
"bounded_above": "[ True True True]",
|
34 |
+
"_np_random": null
|
|
|
|
|
|
|
35 |
},
|
36 |
"action_space": {
|
37 |
":type:": "<class 'gym.spaces.box.Box'>",
|
38 |
+
":serialized:": "gAWV4QsAAAAAAACMDmd5bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lGgFk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMBl9zaGFwZZRLAYWUjANsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWBAAAAAAAAAAAAADAlGgKSwGFlIwBQ5R0lFKUjARoaWdolGgSKJYEAAAAAAAAAAAAAECUaApLAYWUaBV0lFKUjA1ib3VuZGVkX2JlbG93lGgSKJYBAAAAAAAAAAGUaAeMAmIxlImIh5RSlChLA4wBfJROTk5K/////0r/////SwB0lGJLAYWUaBV0lFKUjA1ib3VuZGVkX2Fib3ZllGgSKJYBAAAAAAAAAAGUaCFLAYWUaBV0lFKUjApfbnBfcmFuZG9tlIwUbnVtcHkucmFuZG9tLl9waWNrbGWUjBJfX3JhbmRvbXN0YXRlX2N0b3KUk5SMB01UMTk5MzeUhZRSlH2UKIwNYml0X2dlbmVyYXRvcpRoMIwFc3RhdGWUfZQojANrZXmUaBIolsAJAAAAAAAAAAAAgFPCs5yHA7WTcuyrW+jmsvLPtoHa1QbvYaExTaBrtczZE+YYn3SceS/IpRuAxHkBSZ4PQ+Rd4BiTkhNVzNRKKWTSCJW+NNCeRHJ6A/Ctvwpzm3s+6WCxBtp1A0ULbn3WFmrrBDRmg7fz9aUvae9CY0O1XPfCd1LMRkQ3LQiJbtCOrnf7GLaAT3ue+U8y7BLYuY5ehakZyq49di1nK0KAnsXuxx/1IgFdS88kD8wZUmREV5hwnQr1Ehe16VreO8T/Qc86sV+2h90z2FiJqqLNf3s/yZS3bA9DHzGZfRtgkKu3Bx0ZTN7I+466APXPqRreJf4gBqUW/NB248FO9cpD0wFaed9QV63NqpiFOs/RYeEwD8e32dZFRfi9SK5aLvuApJMu2LOfZweypHIkyPjeY5W+tsm2bdPmVoCAOiyi73cf5k0LQeJNWqZU/wuy/f8myghZ9qrjf+2JSJMaB9VNMXWmxuq4Dq0fkhzJr1ML7SgftfSG29O5koFUMozQL58gAzyX96ZMcpWbZ+3/zlaVhGln5egXC8MtIK6xIFCvh/vD/F1jLgYIp14MM597MuPmTpa+OaKek7bql9Cp8/0skhg5QSCvCaijm8wenxrfqLyRxDPCpS+L3isJC5LrjFgWnsdxQXVrJK8uaobJcTIJ5NrYYfA2l5gH27iPI9EqhzFtZJXiE4vXpH18f3kouYV9RowPzOtmYsbmstR/Mx/VY7E0XBmnMidL4dYTUXgxaDbFxWy3y6miL2yw0I2O09vPWV7LqbwMbthlU26lrLfnJDz88B+7y4pFCwvsHgCsMWq6pvroAF4Ms+++JnhzrL0GLrtfJ7667p42Vg78GirmKIRMFH0p6aLRPV4V/fclE3PLBj0InU315M5v7fDFj/IreJeFUhIAy5/BlvLdgwIfBMBWvyjhqGRBo2KmdiDAO9BPhdi6oGmZUCxTDjUyMd8rjeRdntTE+L9sHQUSvUfpRjKcSIjZXcjsMMSwwU0QzIXewD4nZg8EAZ72iHnChWveW7cB5EQRFE+YPvvyNAw4OvORF+DjDtmLUz6jNz0JbkuKbhIQEEVbpDRkoeIGA4HATymZeE4TX/hvhOxJfX5liXD9/Uon8OjJ/hhIrDNUYR2NOV2R1L+VuK/JA9o0izGPiCC9vi7UK1u4re8tvPrBInC6BH2DpMvWSRepdH0frFxGsH/kRB/S77USeMs38a+yorDaa9Wsb9WnuQg10vl087jhvwSZx77fGq52NV1h/UnZAjnqJOjCPBgQrY0wsSeSW6SOSdItAOupiWkVvSzHz+gopd/3FUUPbjdik7Vd6O6ycGOFwp4+wyZqq1MKHww5exJND6De8NB4fGBzsvKoT8O1fAC3Y2Z+3mLYwkCJXULx2zNSO90bQi0L0BhTF1AidNtqRkak72V+V1kos6m4F+kyZiOiwxfcHKWHmpQe3A8HpX6nacx0Zso0WyF/W/sOLo//2g0YD9koIjgKvw//f9Em4CvxNBUMnJYTVQJGysF4QMShnM6byaMNJhRVALf+X0+SgLJkEwLfKdBbKxjA035OEc2YKmUpvooVUUt+9U8d7cKRgNYKKPwCHyYfLJSQt4ZEdThgjeywDxgsGzPax5SklOLroSYn39feofatZDzJXevfPTHyi1ZLwpe6Hwkbqz1FuOnHiShPedbEA9b/HCtBytSZrgUwlwHpQlAiBTSxpN7TFzVZrrhRd6N8W+FeE9SAGCHwXchFR4SCGThPyRGO/XWkMPJ42BLUOmMGepDQgWH646tjoJSE3EXcA5iDS+Nq6Oh521oC2UPAnvxIj1QbVh8IbxlBytRTTjc0c14E9cyhIPlgIoHriuEFSMXSEzOGL1MmL6UCbiXfsRg9Z6OwWCCl3VeGg5bEZ1kjJkvs08k7wtPk4ATAjaTL3QoY2gf106zFbJtL4D5gmLMJ9OuzE2Fn5uaAqqpjXIqqEXxS9jtpsRU9VTHCg68RwXQVIUhuVJgHq8fOigBMrW7Am5+jjo/GNNlcFcp813dFiXy4qHhjGSEjNXp3ln03NZkOgqXQ8SalJlPOvyrAS9wW9EtjQKhcrBSWSsQ8C3o/Mc/sR/CMRB85ZIojR/tiCKtOutxQMIusIOnYHK8g6kPpTQ8J/PfJ8pa3GEoYoA67axQTXsysd2Y6ZDwpz1HkAeISVK2AlCcuQssrQv8dVLAcins/2kjRFp5Vp82HSX9j6Ci9GH5mkdyqV84vWsdwRz4JNXHZoHVZKnrxSdA1HUwRUI/5oWiqnGk9KyiS4Mv9dQIVluJ1+/pAHYEdG9YgLHiNE2zA7aIQbqqlGX6jH1CrHSPL9mnlHdPiKjozwRXu02UQuzlGJn+/PUkU6cPYLLeLc8e7S2qfCZxbdpHioand7wYKqb5bMb8dA3Dwvm6P6iJXSogJ+Q+0z43li8ydYJqZNZlSjsljr/2c5UU33vMhLDLEXIHT8WzrsS45TsKNbhYfTYx6Ds/8W5yOtiOPWc0+fRlAjbQC++FKo5UaMl2eRCxI4U5/heX2HxJGNk50rICjuFsG/8Q+NUuJgS+y3FMhe+sm1e2MdC+ldkBqEn8oRxajECbP4Wizz1tfJliW/1A5fdGTfUxM3HV72bJgnqswmoAumAx6d36KfuZwEEp0/wcrdo+8/unJ5f2mYeqCrOcaxDJrs6SxW1zVaH/YTZl+RNA0NjPLgaqnlveaes/MkpzsVEQDtvKGFrG1cnmGjZVi2azrSDGQ0Y423nEksDC5awYcOJmVYbeA4DkMSNfj+7Dx2SzH/PVPuXLX9aw9K9QF4Ml48zsSrwsVjIa8+gIdffs2pf2wCKcPtFez6vOT0UDuFHknJjMDg0fI3DnyC7jJqO8V4XpmPyarTp3JJRGhmqTHhpZInn70JMfS+RFry5+rLSOM0T+KWV8fYjs9eyCiZijlR4AiADooXm9G8JIzZCLZX2Dty83iyz7gQzSxYO7ULuTT1stvGuJwbBP4LMhLXkbxdhAmBSDiYNOnc3O+yFsO6Ps9UOQD8S4Pbr8hZ4mFjbicpO635SwpmHINYDeuewln3/GHz69LpCjmpnKPeF9ZxXcq6MR4kJUV2j/dQzqjLniNaQmrMkULdI7W1sMXRFcsz9xs1GVwVqmtMVws8HtvXMYNmosCrrgAFX2ghPz7dXCV6vML5YhfNbDAzzG6MHffrslrhMav3vtlt8Fnld4VaH6IhMkowayT1lSVvfvlKHCWwtKaTcOZrR5LZGalJOpFbVIFUOAo+LnY/25bmc3KloyLzgiTudjPsXEGPNPBIvE/5cMEvU4Lrs0N3tCke4abYDXF9f14QrwLlGgHjAJ1NJSJiIeUUpQoSwNoC05OTkr/////Sv////9LAHSUYk1wAoWUaBV0lFKUjANwb3OUTXACdYwJaGFzX2dhdXNzlEsAjAVnYXVzc5RHAAAAAAAAAAB1YnViLg==",
|
39 |
"dtype": "float32",
|
40 |
+
"_shape": [
|
41 |
+
1
|
42 |
+
],
|
43 |
"low": "[-2.]",
|
44 |
"high": "[2.]",
|
45 |
"bounded_below": "[ True]",
|
46 |
"bounded_above": "[ True]",
|
|
|
|
|
|
|
47 |
"_np_random": "RandomState(MT19937)"
|
48 |
},
|
49 |
"n_envs": 1,
|
|
|
52 |
"_num_timesteps_at_start": 0,
|
53 |
"seed": 0,
|
54 |
"action_noise": null,
|
55 |
+
"start_time": 1653249455.1774566,
|
56 |
"learning_rate": {
|
57 |
":type:": "<class 'function'>",
|
58 |
+
":serialized:": "gAWV0QIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwNX2J1aWx0aW5fdHlwZZSTlIwKTGFtYmRhVHlwZZSFlFKUKGgCjAhDb2RlVHlwZZSFlFKUKEsBSwBLAUsBSxNDBIgAUwCUToWUKYwBX5SFlIxRL2hvbWUvYW50b25pbi9Eb2N1bWVudHMvZGxyL3JsL3RvcmNoeS1iYXNlbGluZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLgEMCAAGUjAN2YWyUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flIxRL2hvbWUvYW50b25pbi9Eb2N1bWVudHMvZGxyL3JsL3RvcmNoeS1iYXNlbGluZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaCB9lH2UKGgXaA6MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgYjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz9QYk3S8an8hZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"
|
59 |
},
|
60 |
"tensorboard_log": null,
|
61 |
"lr_schedule": {
|
62 |
":type:": "<class 'function'>",
|
63 |
+
":serialized:": "gAWV0QIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwNX2J1aWx0aW5fdHlwZZSTlIwKTGFtYmRhVHlwZZSFlFKUKGgCjAhDb2RlVHlwZZSFlFKUKEsBSwBLAUsBSxNDBIgAUwCUToWUKYwBX5SFlIxRL2hvbWUvYW50b25pbi9Eb2N1bWVudHMvZGxyL3JsL3RvcmNoeS1iYXNlbGluZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLgEMCAAGUjAN2YWyUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flIxRL2hvbWUvYW50b25pbi9Eb2N1bWVudHMvZGxyL3JsL3RvcmNoeS1iYXNlbGluZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaCB9lH2UKGgXaA6MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgYjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz9QYk3S8an8hZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"
|
64 |
},
|
65 |
"_last_obs": null,
|
66 |
+
"_last_episode_starts": {
|
67 |
+
":type:": "<class 'numpy.ndarray'>",
|
68 |
+
":serialized:": "gAWVdAAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYBAAAAAAAAAAGUjAVudW1weZSMBWR0eXBllJOUjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwGFlIwBQ5R0lFKULg=="
|
69 |
+
},
|
70 |
"_last_original_obs": {
|
71 |
":type:": "<class 'numpy.ndarray'>",
|
72 |
+
":serialized:": "gAWVgQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMAAAAAAAAAFZTfz+1kJQ93bbgvZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLAUsDhpSMAUOUdJRSlC4="
|
73 |
},
|
74 |
"_episode_num": 100,
|
75 |
+
"use_sde": false,
|
76 |
"sde_sample_freq": -1,
|
77 |
"_current_progress_remaining": 0.0,
|
78 |
"ep_info_buffer": {
|
79 |
":type:": "<class 'collections.deque'>",
|
80 |
+
":serialized:": "gAWVHRAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpSMFW51bXB5LmNvcmUubXVsdGlhcnJheZSMBnNjYWxhcpSTlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYkMIsb/snqwHk8CUhpRSlIwBbJRLyIwBdJRHP/hAGB4D9wZ1fZQoaAZoCWgPQwgsZoS3B52SwJSGlFKUaBVLyGgWR0ARiw2VE/jbdX2UKGgGaAloD0MIBvTCnUvBm8CUhpRSlGgVS8hoFkdAHR6r/82rGXV9lChoBmgJaA9DCOZY3lVP8pTAlIaUUpRoFUvIaBZHQCcb2FnIyTJ1fZQoaAZoCWgPQwiMD7OXHTiWwJSGlFKUaBVLyGgWR0Av9XoTwlSkdX2UKGgGaAloD0MIqOSc2FMLlsCUhpRSlGgVS8hoFkdAM88e8wpOOHV9lChoBmgJaA9DCFWKHY3zPJLAlIaUUpRoFUvIaBZHQDh5/axoqTd1fZQoaAZoCWgPQwhU5BBx47OUwJSGlFKUaBVLyGgWR0A9nFOfukULdX2UKGgGaAloD0MIDksDP5pGksCUhpRSlGgVS8hoFkdAQVC3mV7hN3V9lChoBmgJaA9DCKvRqwGKeJHAlIaUUpRoFUvIaBZHQEOQ/nGKhtd1fZQoaAZoCWgPQwifyf552jaEwJSGlFKUaBVLyGgWR0BF7TK9wm3OdX2UKGgGaAloD0MI9DY2O3JMgsCUhpRSlGgVS8hoFkdASKIGKQ7tA3V9lChoBmgJaA9DCONsOgI4bmDAlIaUUpRoFUvIaBZHQEq2GKQ7tAt1fZQoaAZoCWgPQwiaC1we6xVwwJSGlFKUaBVLyGgWR0BNBQ4bS7XhdX2UKGgGaAloD0MIX0NwXEY+cMCUhpRSlGgVS8hoFkdAT8OQlruYyHV9lChoBmgJaA9DCDfg88NILnDAlIaUUpRoFUvIaBZHQFFCZid8Rcx1fZQoaAZoCWgPQwhaRuo9lZPxv5SGlFKUaBVLyGgWR0BSfnwb2lEadX2UKGgGaAloD0MIATW1bK1v/L+UhpRSlGgVS8hoFkdAU5k4ACGN73V9lChoBmgJaA9DCJj75ChAtADAlIaUUpRoFUvIaBZHQFStpVS4vvl1fZQoaAZoCWgPQwj/klSmmKtdwJSGlFKUaBVLyGgWR0BV6T9CNS62dX2UKGgGaAloD0MIwLFnz2Uq8L+UhpRSlGgVS8hoFkdAVzMfYBeXzHV9lChoBmgJaA9DCPWidr8KPl/AlIaUUpRoFUvIaBZHQFhJV2Rq46R1fZQoaAZoCWgPQwgHXi13ZoLpv5SGlFKUaBVLyGgWR0BZTzDGcWj5dX2UKGgGaAloD0MIPSzUmualXcCUhpRSlGgVS8hoFkdAWktAhStNjHV9lChoBmgJaA9DCLQglPfxnZXAlIaUUpRoFUvIaBZHQFtE4C6pYLd1fZQoaAZoCWgPQwiw5CoWv31ewJSGlFKUaBVLyGgWR0BcaPaQFLWadX2UKGgGaAloD0MI9uy5TI0CcMCUhpRSlGgVS8hoFkdAXhTqoqCpWHV9lChoBmgJaA9DCFEyObUzDPa/lIaUUpRoFUvIaBZHQF+ZFBY3eep1fZQoaAZoCWgPQwgwZeCAlrhfwJSGlFKUaBVLyGgWR0BgcqaNMoMKdX2UKGgGaAloD0MIJzCd1m2fXcCUhpRSlGgVS8hoFkdAYQv/9YOlPHV9lChoBmgJaA9DCCTTodPzVl3AlIaUUpRoFUvIaBZHQGGoz7di2Dx1fZQoaAZoCWgPQwiLqIk+H59cwJSGlFKUaBVLyGgWR0BiQ3ms/6frdX2UKGgGaAloD0MIjsni/iMwXsCUhpRSlGgVS8hoFkdAYtq0OVgQYnV9lChoBmgJaA9DCONTAIzXmJbAlIaUUpRoFUvIaBZHQGNxZ0KZ2IR1fZQoaAZoCWgPQwiKHvgYrGFewJSGlFKUaBVLyGgWR0BkD7SVnmJWdX2UKGgGaAloD0MIzqrP1XabhcCUhpRSlGgVS8hoFkdAZLfp+tr9EXV9lChoBmgJaA9DCKT6zi8K8nLAlIaUUpRoFUvIaBZHQGVXU8vEjxF1fZQoaAZoCWgPQwip3EQtzWV2wJSGlFKUaBVLyGgWR0Bl6StozvZzdX2UKGgGaAloD0MIXcMMjScqbcCUhpRSlGgVS8hoFkdAZng2ZRbbDnV9lChoBmgJaA9DCHGsi9toWV/AlIaUUpRoFUvIaBZHQGcHTKDCgsd1fZQoaAZoCWgPQwjgaMcNv+x2wJSGlFKUaBVLyGgWR0BnljXSSeRQdX2UKGgGaAloD0MIMxZNZ6dFb8CUhpRSlGgVS8hoFkdAaCv6DXe3yHV9lChoBmgJaA9DCGr7V1aad1/AlIaUUpRoFUvIaBZHQGjGrUkOZst1fZQoaAZoCWgPQwikMzDyspdxwJSGlFKUaBVLyGgWR0Bpau40/GEPdX2UKGgGaAloD0MIaqUQyCVbX8CUhpRSlGgVS8hoFkdAagYGVRk3CXV9lChoBmgJaA9DCKkVpu81ol7AlIaUUpRoFUvIaBZHQGqb97ngYP51fZQoaAZoCWgPQwg9RQ4RN0hewJSGlFKUaBVLyGgWR0BrLWQhfShKdX2UKGgGaAloD0MInu+nxks3AcCUhpRSlGgVS8hoFkdAa8WRNh3JP3V9lChoBmgJaA9DCDHO34TCYG3AlIaUUpRoFUvIaBZHQGxeTmwJPZZ1fZQoaAZoCWgPQwiCkCxgAjxwwJSGlFKUaBVLyGgWR0Bs9a5I6KcedX2UKGgGaAloD0MIqmQAqOKaXMCUhpRSlGgVS8hoFkdAbZrbWVeKK3V9lChoBmgJaA9DCGVW73A7VADAlIaUUpRoFUvIaBZHQG5B8yvcJt11fZQoaAZoCWgPQwg6svLLYKxdwJSGlFKUaBVLyGgWR0Bu5LPhQ3xXdX2UKGgGaAloD0MIvTlcqz1lXsCUhpRSlGgVS8hoFkdAb38nxaxHG3V9lChoBmgJaA9DCDAt6pPch17AlIaUUpRoFUvIaBZHQHANEy1uzhR1fZQoaAZoCWgPQwinQdE8APNrwJSGlFKUaBVLyGgWR0BwXMAdXDFZdX2UKGgGaAloD0MIaVGf5A78XcCUhpRSlGgVS8hoFkdAcKf8UmD15HV9lChoBmgJaA9DCJYjZCDPy13AlIaUUpRoFUvIaBZHQHDrKrFOwgV1fZQoaAZoCWgPQwh2qRH6ma9ewJSGlFKUaBVLyGgWR0BxNTHHWBjGdX2UKGgGaAloD0MIWDz1SIP0XcCUhpRSlGgVS8hoFkdAcYQsD4gzQHV9lChoBmgJaA9DCGQGKuPfZw/AlIaUUpRoFUvIaBZHQHHH7fk3juN1fZQoaAZoCWgPQwibcRqiCsVewJSGlFKUaBVLyGgWR0ByCtiH6/IsdX2UKGgGaAloD0MIVWthFlpHbMCUhpRSlGgVS8hoFkdAckzazeGfw3V9lChoBmgJaA9DCOBIoMGmqF7AlIaUUpRoFUvIaBZHQHKRGSEDhcZ1fZQoaAZoCWgPQwjgS+FBM6NzwJSGlFKUaBVLyGgWR0By1E+Y+jdpdX2UKGgGaAloD0MIDR07qMTjXcCUhpRSlGgVS8hoFkdAcxfOMVDa5HV9lChoBmgJaA9DCCOfVzx1GWzAlIaUUpRoFUvIaBZHQHNb/jXFtKt1fZQoaAZoCWgPQwguNxjqsDRfwJSGlFKUaBVLyGgWR0BzrMaBI4EPdX2UKGgGaAloD0MIFcYWghzjX8CUhpRSlGgVS8hoFkdAc/esPrfLtHV9lChoBmgJaA9DCDc3picssf6/lIaUUpRoFUvIaBZHQHQ//+S8rZt1fZQoaAZoCWgPQwhqF9NM9wZfwJSGlFKUaBVLyGgWR0B0iu2kSElFdX2UKGgGaAloD0MIQiRDjq38XcCUhpRSlGgVS8hoFkdAdNHjC53C9HV9lChoBmgJaA9DCP94r1qZ8P+/lIaUUpRoFUvIaBZHQHUYdkSVW0Z1fZQoaAZoCWgPQwjKqDKMO8puwJSGlFKUaBVLyGgWR0B1Xu3ocJdCdX2UKGgGaAloD0MItydIbHeQbMCUhpRSlGgVS8hoFkdAdab9Ujs2N3V9lChoBmgJaA9DCGWlSSnooV3AlIaUUpRoFUvIaBZHQHXvKqjrRjV1fZQoaAZoCWgPQwjUDn9NVsVswJSGlFKUaBVLyGgWR0B2MTYDklu4dX2UKGgGaAloD0MIjQxyF+GIbsCUhpRSlGgVS8hoFkdAdnN/FR51NnV9lChoBmgJaA9DCGkc6ndhCG3AlIaUUpRoFUvIaBZHQHa2eLm6oVF1fZQoaAZoCWgPQwitwfuqXMNcwJSGlFKUaBVLyGgWR0B2+zj1f3N+dX2UKGgGaAloD0MI5iFTPgRVXsCUhpRSlGgVS8hoFkdAdz9wTufEoHV9lChoBmgJaA9DCC/ej9svTF/AlIaUUpRoFUvIaBZHQHeCM7hegL91fZQoaAZoCWgPQwgsLo7KTZJfwJSGlFKUaBVLyGgWR0B3xOvStvGZdX2UKGgGaAloD0MItOidCjj/bcCUhpRSlGgVS8hoFkdAeBGejmCAc3V9lChoBmgJaA9DCHuH26HhL27AlIaUUpRoFUvIaBZHQHhl7aVUuL91fZQoaAZoCWgPQwhSYWwhyIEBwJSGlFKUaBVLyGgWR0B4reaoddVvdX2UKGgGaAloD0MIhuXPtwXoX8CUhpRSlGgVS8hoFkdAePLYqoZQ53V9lChoBmgJaA9DCO2b+6vH/l3AlIaUUpRoFUvIaBZHQHku18CxNZh1fZQoaAZoCWgPQwgShZZ1/9gEwJSGlFKUaBVLyGgWR0B5au0LMLWqdX2UKGgGaAloD0MI2quPh74bX8CUhpRSlGgVS8hoFkdAeazCKaXrt3V9lChoBmgJaA9DCOG04EVfYQjAlIaUUpRoFUvIaBZHQHntTbJwKjV1fZQoaAZoCWgPQwj0Fg/vOXD5v5SGlFKUaBVLyGgWR0B6J1/I8yN5dX2UKGgGaAloD0MIuoJtxBOFbMCUhpRSlGgVS8hoFkdAemI7qY7aI3V9lChoBmgJaA9DCKDiOPBq0F7AlIaUUpRoFUvIaBZHQHqfxLf1pTN1fZQoaAZoCWgPQwjmIVM+BPxewJSGlFKUaBVLyGgWR0B625p/PPcBdX2UKGgGaAloD0MIYhBYOTQebsCUhpRSlGgVS8hoFkdAexVsvqTr3XV9lChoBmgJaA9DCPgZFw4En2zAlIaUUpRoFUvIaBZHQHtP2ilBQep1fZQoaAZoCWgPQwiH+IctPWVtwJSGlFKUaBVLyGgWR0B7jGoP07KadX2UKGgGaAloD0MI1esWgXGLccCUhpRSlGgVS8hoFkdAe8ZvpyIYWXV9lChoBmgJaA9DCAwDllzF327AlIaUUpRoFUvIaBZHQHv/27aqS5l1ZS4="
|
81 |
},
|
82 |
"ep_success_buffer": {
|
83 |
":type:": "<class 'collections.deque'>",
|
84 |
":serialized:": "gAWVIAAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKULg=="
|
85 |
},
|
86 |
+
"_n_updates": 19900,
|
87 |
"buffer_size": 1,
|
88 |
"batch_size": 256,
|
89 |
"learning_starts": 100,
|
90 |
"tau": 0.005,
|
91 |
"gamma": 0.99,
|
92 |
+
"gradient_steps": 1,
|
93 |
"optimize_memory_usage": false,
|
94 |
"replay_buffer_class": {
|
95 |
":type:": "<class 'abc.ABCMeta'>",
|
96 |
":serialized:": "gAWVNQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwMUmVwbGF5QnVmZmVylJOULg==",
|
97 |
"__module__": "stable_baselines3.common.buffers",
|
98 |
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device:\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
99 |
+
"__init__": "<function ReplayBuffer.__init__ at 0x7f5e9bb06cb0>",
|
100 |
+
"add": "<function ReplayBuffer.add at 0x7f5e9bb06d40>",
|
101 |
+
"sample": "<function ReplayBuffer.sample at 0x7f5e9bafc440>",
|
102 |
+
"_get_samples": "<function ReplayBuffer._get_samples at 0x7f5e9bafc4d0>",
|
103 |
"__abstractmethods__": "frozenset()",
|
104 |
+
"_abc_impl": "<_abc_data object at 0x7f5e9bb653f0>"
|
105 |
},
|
106 |
"replay_buffer_kwargs": {},
|
107 |
"train_freq": {
|
108 |
":type:": "<class 'stable_baselines3.common.type_aliases.TrainFreq'>",
|
109 |
+
":serialized:": "gAWVYQAAAAAAAACMJXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi50eXBlX2FsaWFzZXOUjAlUcmFpbkZyZXGUk5RLAWgAjBJUcmFpbkZyZXF1ZW5jeVVuaXSUk5SMBHN0ZXCUhZRSlIaUgZQu"
|
110 |
},
|
111 |
"use_sde_at_warmup": false,
|
112 |
"target_entropy": -1.0,
|
113 |
"ent_coef": "auto",
|
114 |
+
"target_update_interval": 1
|
|
|
115 |
}
|
sac-Pendulum-v1/ent_coef_optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1191
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:299ae9796b8c15f713fae5dc68ea0cf8695c52ba990aac945847bcc55a55801a
|
3 |
size 1191
|
sac-Pendulum-v1/policy.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:756a5ba0a333d5e71d09c549d872a8a8d81470e1389ad88ac2c0f29c321b9058
|
3 |
+
size 1357125
|
sac-Pendulum-v1/pytorch_variables.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 747
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fd4c5341f109009e7b669a7c8e5026273c2f99c1d351e4e6b0a29a862f3bde9
|
3 |
size 747
|
train_eval_metrics.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:156083ebf36e40a969d6047de3ddb6180e74b53385b6726cd8d8289aa0b2c814
|
3 |
+
size 2928
|