esperesa commited on
Commit
06f2b5b
1 Parent(s): f6766a4

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ replay.mp4 filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -6,7 +6,7 @@ tags:
6
  - reinforcement-learning
7
  - stable-baselines3
8
  model-index:
9
- - name: A2C
10
  results:
11
  - task:
12
  type: reinforcement-learning
@@ -16,13 +16,13 @@ model-index:
16
  type: PandaPickAndPlace-v3
17
  metrics:
18
  - type: mean_reward
19
- value: -26.60 +/- 19.93
20
  name: mean_reward
21
  verified: false
22
  ---
23
 
24
- # **A2C** Agent playing **PandaPickAndPlace-v3**
25
- This is a trained model of a **A2C** agent playing **PandaPickAndPlace-v3**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3).
27
 
28
  ## Usage (with Stable-baselines3)
 
6
  - reinforcement-learning
7
  - stable-baselines3
8
  model-index:
9
+ - name: TQC
10
  results:
11
  - task:
12
  type: reinforcement-learning
 
16
  type: PandaPickAndPlace-v3
17
  metrics:
18
  - type: mean_reward
19
+ value: -5.80 +/- 3.16
20
  name: mean_reward
21
  verified: false
22
  ---
23
 
24
+ # **TQC** Agent playing **PandaPickAndPlace-v3**
25
+ This is a trained model of a **TQC** agent playing **PandaPickAndPlace-v3**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3).
27
 
28
  ## Usage (with Stable-baselines3)
config.json CHANGED
@@ -1 +1 @@
1
- {"policy_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVNwAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLnNhYy5wb2xpY2llc5SMEE11bHRpSW5wdXRQb2xpY3mUk5Qu", "__module__": "stable_baselines3.sac.policies", "__doc__": "\n Policy class (with both actor and critic) for SAC.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param use_expln: Use ``expln()`` function instead of ``exp()`` when using gSDE to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param clip_mean: Clip the mean output when using gSDE to avoid numerical instability.\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n :param n_critics: Number of critic networks to create.\n :param share_features_extractor: Whether to share or not the features extractor\n between the actor and the critic (this saves computation time)\n ", "__init__": "<function MultiInputPolicy.__init__ at 0x7e1bf110d7e0>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x7e1bf1107040>"}, "verbose": 1, "policy_kwargs": {"net_arch": [512, 512, 512], "n_critics": 2, "use_sde": false}, "num_timesteps": 1000000, "_total_timesteps": 1000000, "_num_timesteps_at_start": 0, "seed": null, "action_noise": null, "start_time": 1726843926512342379, "learning_rate": 0.001, "tensorboard_log": null, "_last_obs": null, "_last_episode_starts": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVdwAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYEAAAAAAAAAAEBAQGUjAVudW1weZSMBWR0eXBllJOUjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwSFlIwBQ5R0lFKULg=="}, "_last_original_obs": {":type:": "<class 'collections.OrderedDict'>", ":serialized:": "gAWViwIAAAAAAACMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwSbnVtcHkuY29yZS5udW1lcmljlIwLX2Zyb21idWZmZXKUk5QoljAAAAAAAAAAuyhrPQZI/z1hwaM8ghpPPXWRADvQv6M86AIkPTeWEL5hwaM8r9+cvHPg5D3WwKM8lIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksESwOGlIwBQ5R0lFKUjAxkZXNpcmVkX2dvYWyUaAcoljAAAAAAAAAAdO86vSdx0D2lhkM+z1CYvCXkGL6Y3Tg+BdENPkGlnj2l4kk+NYfqvYLBhr1iwOU9lGgOSwRLA4aUaBJ0lFKUjAtvYnNlcnZhdGlvbpRoByiWMAEAAAAAAACUguE9NwNMvr2q+D1NLFY7bSrVveuC1r2F1qM9uyhrPQZI/z1hwaM8pqViNwPUbrePJDe6iAbFt5m7qLcgdXEttM9aMtOeZzIX3xO6GP8OPh/pmL6cNU08w2TWvQSIhD2rob6+ENCjPYIaTz11kQA70L+jPFbYjDiOOhs30yhRud6tqbda2xe2EBVPrOcyTC8dWlsuuMX7uAW4sj3ohbw91UV9PhLJBT56M0M+17jVvr9uizToAiQ9N5YQvmHBozyWMG+3DyliNzrjujik4ag3Ow7FN4BP9rGguqo0bJQ1NWIFFDr+oQI8ghZWvkExPz5gOQO+WMrlviskQD4eorwzr9+cvHPg5D3WwKM8WJiWtv/VGTj8mw+410YxNUjFN7es1mKtTcWTrk9bPzCLnMq4lGgOSwRLE4aUaBJ0lFKUdS4=", "achieved_goal": "[[ 0.05741189 0.12464909 0.01998967]\n [ 0.05056239 0.00196179 0.01998892]\n [ 0.04004183 -0.14119802 0.01998967]\n [-0.01914963 0.11175623 0.01998941]]", "desired_goal": "[[-0.04563852 0.10177832 0.19094332]\n [-0.01859322 -0.1493078 0.18053281]\n [ 0.13849266 0.07746363 0.19715364]\n [-0.1145157 -0.06579877 0.11218335]]", "observation": "[[ 1.10112339e-01 -1.99231014e-01 1.21419407e-01 3.26802139e-03\n -1.04084827e-01 -1.04741894e-01 7.99990073e-02 5.74118905e-02\n 1.24649093e-01 1.99896712e-02 1.35092178e-05 -1.42352683e-05\n -6.98634365e-04 -2.34872714e-05 -2.01145176e-05 1.37252709e-11\n 1.27364963e-08 1.34820821e-08 -5.64084796e-04]\n [ 1.39644980e-01 -2.98653573e-01 1.25249885e-02 -1.04684375e-01\n 6.47125542e-02 -3.72327179e-01 7.99866915e-02 5.05623892e-02\n 1.96179491e-03 1.99889243e-02 6.71601592e-05 9.25235327e-06\n -1.99470014e-04 -2.02273332e-05 -2.26284328e-06 -2.94281613e-12\n 1.85717761e-10 4.98748716e-11 -1.20054407e-04]\n [ 8.72650519e-02 9.20522809e-02 2.47336701e-01 1.30649835e-01\n 1.90626055e-01 -4.17425841e-01 2.59713460e-07 4.00418341e-02\n -1.41198024e-01 1.99896712e-02 -1.42568224e-05 1.34802094e-05\n 8.91149539e-05 2.01322327e-05 2.34908566e-05 -7.16858040e-09\n 3.18007551e-07 6.76437367e-07 5.64655405e-04]\n [ 7.97319226e-03 -2.09070235e-01 1.86711326e-01 -1.28148556e-01\n -4.48809385e-01 1.87637970e-01 8.78392115e-08 -1.91496294e-02\n 1.11756228e-01 1.99894123e-02 -4.48808350e-06 3.66773420e-05\n -3.42391286e-05 6.60407238e-07 -1.09535831e-05 -1.28942794e-11\n -6.71983372e-11 6.96151525e-10 -9.66126900e-05]]"}, "_episode_num": 27339, "use_sde": false, "sde_sample_freq": -1, "_current_progress_remaining": 0.0, "_stats_window_size": 100, "ep_info_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWV4AsAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwBgAAAAAAACMAWyUSweMAXSUR0CzfjAwoLG8dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czfpx42S+ydX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzftJ1FH8TdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzfsIAKfFrdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czfy2gi/widX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czf50j1PFedX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czf9OB19v1dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czf8L8ejmCdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzgC8wco6TdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzgKBnezlcdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0CzgNUwrUb2dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzgNnUpd8idX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzgMkmhM8HdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0CzgQZooNNKdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0CzgSPfTCtSdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0CzgUx95QgtdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzgTBxxT86dX2UKGgGR8AYAAAAAAAAaAdLB2gIR0CzgW/ykKu0dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzgcyEg4ffdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0CzgenRgJC0dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czgb4dU83ddX2UKGgGR8AcAAAAAAAAaAdLCGgIR0CzghAmzBykdX2UKGgGRwAAAAAAAAAAaAdLAWgIR0CzghTCUHIIdX2UKGgGR8AoAAAAAAAAaAdLDWgIR0CzglJgG8mKdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czgh9jgAIZdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czgl0BXCCSdX2UKGgGR8AkAAAAAAAAaAdLC2gIR0CzgleIEbHZdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0Czgozr7fpEdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzgrPdyksSdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czg1IrBj4IdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czg1s1jy4GdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czg5A+t8u0dX2UKGgGR8AUAAAAAAAAaAdLBmgIR0Czg3hyn1nNdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0Czg61aKUFCdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czg7NH+ZPVdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzhF5K3/gjdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0CzhI+k1uR+dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzhJgL7XQMdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzhM9NrTH9dX2UKGgGR8BGgAAAAAAAaAdLLmgIR0CzhMVndweedX2UKGgGR8AYAAAAAAAAaAdLB2gIR0CzhPfuPV/ddX2UKGgGR8AgAAAAAAAAaAdLCWgIR0CzhPlyBCladX2UKGgGR8AkAAAAAAAAaAdLC2gIR0CzhTjtXxOMdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzhbYbGWD6dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzhbtN34bkdX2UKGgGR8AyAAAAAAAAaAdLE2gIR0CzhiPMGHHndX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzhhkeQuEmdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzhlWaH9FXdX2UKGgGR8AqAAAAAAAAaAdLDmgIR0CzhnGPYFq0dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czhs/HxSYPdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0CzhwDS1E3LdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzhzO4G2TgdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0CzhzA/5ckddX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czh3Kl+EytdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0Czh1zMRpUQdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czh44+jdpJdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0Czh4aTjebedX2UKGgGR8AoAAAAAAAAaAdLDWgIR0Czh309lmOEdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0Czh74Vh1DCdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0Czh6kka/ATdX2UKGgGR8AIAAAAAAAAaAdLBGgIR0Czh9ZLuhK2dX2UKGgGR8AmAAAAAAAAaAdLDGgIR0CziCAgDA8CdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CziKIkzGgjdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CziL4/eLvUdX2UKGgGRwAAAAAAAAAAaAdLAWgIR0CziMRS1maqdX2UKGgGRwAAAAAAAAAAaAdLAWgIR0CziMpVGTcJdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CziNkvf0mMdX2UKGgGR8A1AAAAAAAAaAdLFmgIR0CziSay0KJEdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CziUp4wAU+dX2UKGgGR8AkAAAAAAAAaAdLC2gIR0CziWZUT+NtdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzifUb5uZUdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzifoVZcLSdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzimE4//vOdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzinxkEs8QdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0CziqnPJJXhdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0CzipPuw5eadX2UKGgGR8AiAAAAAAAAaAdLCmgIR0Czit1KPGQ0dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czivh1Tzd2dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzivvXPJJYdX2UKGgGR8AqAAAAAAAAaAdLDmgIR0Czi0HqNZNgdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0Czi28guAZsdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czi5Illbu/dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czi9isXBP9dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czi+7OmixndX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzjGGQfZEldX2UKGgGR8AkAAAAAAAAaAdLC2gIR0CzjCeLWI43dX2UKGgGR8AoAAAAAAAAaAdLDWgIR0CzjGqNlyzYdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzjIptzjm0dX2UKGgGR8AcAAAAAAAAaAdLCGgIR0CzjJWW+oLodX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzjNVB+nZTdX2UKGgGR8AIAAAAAAAAaAdLBGgIR0CzjOlmjCYUdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzjWpsTFl1dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzjZCyQgcMdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzjZshkiD/dX2UKGgGR8AkAAAAAAAAaAdLC2gIR0CzjcmBBiTddX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czje8pkPMCdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0Czjhzd56dEdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzjmpO8CgcdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0CzjpmsaKk3dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Czjsczl90BdWUu"}, "ep_success_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVhgAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKIiJiYmJiYmJiYmIiYmIiIiJiImIiYiIiImJiIiJiYmJiIiJiYiJiYiIiIiJiYiJiYiJiImIiYiJiIiIiIiIiYmIiImIiYiJiYmJiIiIiYmIiImJiYmIiImIiYiJiYmIiYiJiYllLg=="}, "_n_updates": 249750, "observation_space": {":type:": "<class 'gymnasium.spaces.dict.Dict'>", ":serialized:": "gAWVMgQAAAAAAACMFWd5bW5hc2l1bS5zcGFjZXMuZGljdJSMBERpY3SUk5QpgZR9lCiMBnNwYWNlc5SMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwUZ3ltbmFzaXVtLnNwYWNlcy5ib3iUjANCb3iUk5QpgZR9lCiMBWR0eXBllIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYowNYm91bmRlZF9iZWxvd5SMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYDAAAAAAAAAAEBAZRoE4wCYjGUiYiHlFKUKEsDjAF8lE5OTkr/////Sv////9LAHSUYksDhZSMAUOUdJRSlIwNYm91bmRlZF9hYm92ZZRoHCiWAwAAAAAAAAABAQGUaCBLA4WUaCR0lFKUjAZfc2hhcGWUSwOFlIwDbG93lGgcKJYMAAAAAAAAAAAAIMEAACDBAAAgwZRoFksDhZRoJHSUUpSMBGhpZ2iUaBwolgwAAAAAAAAAAAAgQQAAIEEAACBBlGgWSwOFlGgkdJRSlIwIbG93X3JlcHKUjAUtMTAuMJSMCWhpZ2hfcmVwcpSMBDEwLjCUjApfbnBfcmFuZG9tlE51YowMZGVzaXJlZF9nb2FslGgNKYGUfZQoaBBoFmgZaBwolgMAAAAAAAAAAQEBlGggSwOFlGgkdJRSlGgnaBwolgMAAAAAAAAAAQEBlGggSwOFlGgkdJRSlGgsSwOFlGguaBwolgwAAAAAAAAAAAAgwQAAIMEAACDBlGgWSwOFlGgkdJRSlGgzaBwolgwAAAAAAAAAAAAgQQAAIEEAACBBlGgWSwOFlGgkdJRSlGg4jAUtMTAuMJRoOowEMTAuMJRoPE51YowLb2JzZXJ2YXRpb26UaA0pgZR9lChoEGgWaBloHCiWEwAAAAAAAAABAQEBAQEBAQEBAQEBAQEBAQEBlGggSxOFlGgkdJRSlGgnaBwolhMAAAAAAAAAAQEBAQEBAQEBAQEBAQEBAQEBAZRoIEsThZRoJHSUUpRoLEsThZRoLmgcKJZMAAAAAAAAAAAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMGUaBZLE4WUaCR0lFKUaDNoHCiWTAAAAAAAAAAAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBlGgWSxOFlGgkdJRSlGg4jAUtMTAuMJRoOowEMTAuMJRoPE51YnVoLE5oEE5oPE51Yi4=", "spaces": "OrderedDict([('achieved_goal', Box(-10.0, 10.0, (3,), float32)), ('desired_goal', Box(-10.0, 10.0, (3,), float32)), ('observation', Box(-10.0, 10.0, (19,), float32))])", "_shape": null, "dtype": null, "_np_random": null}, "action_space": {":type:": "<class 'gymnasium.spaces.box.Box'>", ":serialized:": "gAWVawIAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMDWJvdW5kZWRfYmVsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWBAAAAAAAAAABAQEBlGgIjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwSFlIwBQ5R0lFKUjA1ib3VuZGVkX2Fib3ZllGgRKJYEAAAAAAAAAAEBAQGUaBVLBIWUaBl0lFKUjAZfc2hhcGWUSwSFlIwDbG93lGgRKJYQAAAAAAAAAAAAgL8AAIC/AACAvwAAgL+UaAtLBIWUaBl0lFKUjARoaWdolGgRKJYQAAAAAAAAAAAAgD8AAIA/AACAPwAAgD+UaAtLBIWUaBl0lFKUjAhsb3dfcmVwcpSMBC0xLjCUjAloaWdoX3JlcHKUjAMxLjCUjApfbnBfcmFuZG9tlIwUbnVtcHkucmFuZG9tLl9waWNrbGWUjBBfX2dlbmVyYXRvcl9jdG9ylJOUjAVQQ0c2NJRoMowUX19iaXRfZ2VuZXJhdG9yX2N0b3KUk5SGlFKUfZQojA1iaXRfZ2VuZXJhdG9ylIwFUENHNjSUjAVzdGF0ZZR9lChoPYoR2cVXcJrDlqXIPYhO8uuvtQCMA2luY5SKELELOvwIjRhHhF4OXVxnPRJ1jApoYXNfdWludDMylEsAjAh1aW50ZWdlcpRLAHVidWIu", "dtype": "float32", "bounded_below": "[ True True True True]", "bounded_above": "[ True True True True]", "_shape": [4], "low": "[-1. -1. -1. -1.]", "high": "[1. 1. 1. 1.]", "low_repr": "-1.0", "high_repr": "1.0", "_np_random": "Generator(PCG64)"}, "n_envs": 4, "buffer_size": 1000000, "batch_size": 2048, "learning_starts": 1000, "tau": 0.05, "gamma": 0.95, "gradient_steps": 1, "optimize_memory_usage": false, "replay_buffer_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVPwAAAAAAAACMJ3N0YWJsZV9iYXNlbGluZXMzLmhlci5oZXJfcmVwbGF5X2J1ZmZlcpSMD0hlclJlcGxheUJ1ZmZlcpSTlC4=", "__module__": "stable_baselines3.her.her_replay_buffer", "__annotations__": "{'env': typing.Optional[stable_baselines3.common.vec_env.base_vec_env.VecEnv]}", "__doc__": "\n Hindsight Experience Replay (HER) buffer.\n Paper: https://arxiv.org/abs/1707.01495\n\n Replay buffer for sampling HER (Hindsight Experience Replay) transitions.\n\n .. note::\n\n Compared to other implementations, the ``future`` goal sampling strategy is inclusive:\n the current transition can be used when re-sampling.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param env: The training environment\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n Disabled for now (see https://github.com/DLR-RM/stable-baselines3/pull/243#discussion_r531535702)\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n :param n_sampled_goal: Number of virtual transitions to create per real transition,\n by sampling new goals.\n :param goal_selection_strategy: Strategy for sampling goals for replay.\n One of ['episode', 'final', 'future']\n :param copy_info_dict: Whether to copy the info dictionary and pass it to\n ``compute_reward()`` method.\n Please note that the copy may cause a slowdown.\n False by default.\n ", "__init__": "<function HerReplayBuffer.__init__ at 0x7e1bf12f1ea0>", "__getstate__": "<function HerReplayBuffer.__getstate__ at 0x7e1bf12f1f30>", "__setstate__": "<function HerReplayBuffer.__setstate__ at 0x7e1bf12f1fc0>", "set_env": "<function HerReplayBuffer.set_env at 0x7e1bf12f2050>", "add": "<function HerReplayBuffer.add at 0x7e1bf12f20e0>", "_compute_episode_length": "<function HerReplayBuffer._compute_episode_length at 0x7e1bf12f2170>", "sample": "<function HerReplayBuffer.sample at 0x7e1bf12f2200>", "_get_real_samples": "<function HerReplayBuffer._get_real_samples at 0x7e1bf12f2290>", "_get_virtual_samples": "<function HerReplayBuffer._get_virtual_samples at 0x7e1bf12f2320>", "_sample_goals": "<function HerReplayBuffer._sample_goals at 0x7e1bf12f23b0>", "truncate_last_trajectory": "<function HerReplayBuffer.truncate_last_trajectory at 0x7e1bf12f2440>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x7e1bf12f7940>"}, "replay_buffer_kwargs": {}, "train_freq": {":type:": "<class 'stable_baselines3.common.type_aliases.TrainFreq'>", ":serialized:": "gAWVYQAAAAAAAACMJXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi50eXBlX2FsaWFzZXOUjAlUcmFpbkZyZXGUk5RLAWgAjBJUcmFpbkZyZXF1ZW5jeVVuaXSUk5SMBHN0ZXCUhZRSlIaUgZQu"}, "use_sde_at_warmup": false, "target_entropy": -4.0, "ent_coef": "auto", "target_update_interval": 1, "lr_schedule": {":type:": "<class 'function'>", ":serialized:": "gAWVrQMAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLA0sTQwx0AIgAfACDAYMBUwCUToWUjAVmbG9hdJSFlIwScHJvZ3Jlc3NfcmVtYWluaW5nlIWUjF4vaG9tZS9sdXlhbmcvbWluaWNvbmRhMy9lbnZzL3JsL2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwIPGxhbWJkYT6US2FDAgwAlIwOdmFsdWVfc2NoZWR1bGWUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flIxeL2hvbWUvbHV5YW5nL21pbmljb25kYTMvZW52cy9ybC9saWIvcHl0aG9uMy4xMC9zaXRlLXBhY2thZ2VzL3N0YWJsZV9iYXNlbGluZXMzL2NvbW1vbi91dGlscy5weZR1Tk5oAIwQX21ha2VfZW1wdHlfY2VsbJSTlClSlIWUdJRSlGgAjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoIX2UfZQoaBhoD4wMX19xdWFsbmFtZV9flIwhZ2V0X3NjaGVkdWxlX2ZuLjxsb2NhbHM+LjxsYW1iZGE+lIwPX19hbm5vdGF0aW9uc19flH2UjA5fX2t3ZGVmYXVsdHNfX5ROjAxfX2RlZmF1bHRzX1+UTowKX19tb2R1bGVfX5RoGYwHX19kb2NfX5ROjAtfX2Nsb3N1cmVfX5RoAIwKX21ha2VfY2VsbJSTlGgCKGgHKEsBSwBLAEsBSwFLE0MEiABTAJRoCSmMAV+UhZRoDowEZnVuY5RLhUMCBAGUjAN2YWyUhZQpdJRSlGgVTk5oHSlSlIWUdJRSlGgjaD19lH2UKGgYaDRoJowZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5RoKH2UaCpOaCtOaCxoGWgtTmguaDBHP1BiTdLxqfyFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMIWUUpSFlGhFXZRoR32UdYaUhlIwLg=="}, "batch_norm_stats": [], "batch_norm_stats_target": [], "system_info": {"OS": "Linux-6.8.0-41-generic-x86_64-with-glibc2.39 # 41-Ubuntu SMP PREEMPT_DYNAMIC Fri Aug 2 20:41:06 UTC 2024", "Python": "3.10.12", "Stable-Baselines3": "2.3.2", "PyTorch": "2.4.1+cu121", "GPU Enabled": "True", "Numpy": "1.26.4", "Cloudpickle": "3.0.0", "Gymnasium": "0.29.1", "OpenAI Gym": "0.26.2"}}
 
1
+ {"policy_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVMQAAAAAAAACMGHNiM19jb250cmliLnRxYy5wb2xpY2llc5SMEE11bHRpSW5wdXRQb2xpY3mUk5Qu", "__module__": "sb3_contrib.tqc.policies", "__doc__": "\n Policy class (with both actor and critic) for TQC.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param use_expln: Use ``expln()`` function instead of ``exp()`` when using gSDE to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param clip_mean: Clip the mean output when using gSDE to avoid numerical instability.\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n :param n_quantiles: Number of quantiles for the critic.\n :param n_critics: Number of critic networks to create.\n :param share_features_extractor: Whether to share or not the features extractor\n between the actor and the critic (this saves computation time)\n ", "__init__": "<function MultiInputPolicy.__init__ at 0x715a28a5b0a0>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x715a28a57d00>"}, "verbose": 1, "policy_kwargs": {"net_arch": [512, 512, 512], "n_critics": 2, "use_sde": false}, "num_timesteps": 10000000, "_total_timesteps": 10000000, "_num_timesteps_at_start": 0, "seed": null, "action_noise": null, "start_time": 1726855976903048469, "learning_rate": 0.001, "tensorboard_log": null, "_last_obs": null, "_last_episode_starts": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVdwAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYEAAAAAAAAAAEBAQGUjAVudW1weZSMBWR0eXBllJOUjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwSFlIwBQ5R0lFKULg=="}, "_last_original_obs": {":type:": "<class 'collections.OrderedDict'>", ":serialized:": "gAWViwIAAAAAAACMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwSbnVtcHkuY29yZS5udW1lcmljlIwLX2Zyb21idWZmZXKUk5QoljAAAAAAAAAAgnwcPUypq73iwKM8hNxrOrTny70WUqk8Qg2sPEXVAr4K16M8c3aevGtyGb4VwaM8lIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksESwOGlIwBQ5R0lFKUjAxkZXNpcmVkX2dvYWyUaAcoljAAAAAAAAAAEirrO+DXEb1CVjI+1b8hPQkvCj5Nn1w+s9PzPUxp+D20CqU9PGVBPRsKpr1XCho+lGgOSwRLA4aUaBJ0lFKUjAtvYnNlcnZhdGlvbpRoByiWMAEAAAAAAACOp3Q9j7CPvfoc9z0qbm2+4nccvyvnq78oc38ygnwcPUypq73iwKM8QJaWNls4Fbi07gG3yB/ZtTjGNzcPioC1YjkjMmrPSLjPn8q400SQO5FLpr1F5KI7lPMivP+93j2pwwW+aGKjPYTcazq058u9FlKpPLKJDT074484f4JbveD3aLvB+5I+SE+GPRcPWUCdiSO+YX/TwOpyHT0ZbBqsQyNKPgAAAAAAAACAAAAAAAAAAABCDaw8RdUCvgrXozwAAAAAAAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJmLU84xzLvDhgKj4cgVG/w4idv8GQhb+IdpEyc3aevGtyGb4VwaM85o2WNlpZAbj8JYK20o7Atm7GNzdse6q2MnVbMw4rhblyy8q4lGgOSwRLE4aUaBJ0lFKUdS4=", "achieved_goal": "[[ 0.03820468 -0.083819 0.01998943]\n [ 0.00089974 -0.09956303 0.02066902]\n [ 0.02100242 -0.12776668 0.02 ]\n [-0.01934359 -0.14985053 0.01998953]]", "desired_goal": "[[ 0.00717665 -0.03560627 0.17415717]\n [ 0.03948959 0.13494505 0.21545143]\n [ 0.11905613 0.12129459 0.08058682]\n [ 0.04721569 -0.08107396 0.15043007]]", "observation": "[[ 5.97301051e-02 -7.01609775e-02 1.20660737e-01 -2.31865555e-01\n -6.11204267e-01 -1.34299219e+00 1.48691370e-08 3.82046774e-02\n -8.38190019e-02 1.99894346e-02 4.48783976e-06 -3.55768534e-05\n -7.74457658e-06 -1.61770095e-06 1.09538014e-05 -9.57692350e-07\n 9.50089607e-09 -4.78768852e-05 -9.66187727e-05]\n [ 4.40273574e-03 -8.11988190e-02 4.97105950e-03 -9.94576886e-03\n 1.08760826e-01 -1.30629197e-01 7.97775388e-02 8.99739796e-04\n -9.95630324e-02 2.06690244e-02 3.45551446e-02 6.86109634e-05\n -5.35912476e-02 -3.55481356e-03 2.87076980e-01 6.55809045e-02\n 3.39154601e+00 -1.59704641e-01 -6.60929918e+00]\n [ 3.84396687e-02 -2.19447225e-12 1.97400138e-01 0.00000000e+00\n -0.00000000e+00 0.00000000e+00 0.00000000e+00 2.10024156e-02\n -1.27766684e-01 1.99999996e-02 0.00000000e+00 -0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n [ 2.21672226e-02 -2.47940477e-02 1.66382670e-01 -8.18376303e-01\n -1.23073614e+00 -1.04348004e+00 1.69341234e-08 -1.93435904e-02\n -1.49850532e-01 1.99895296e-02 4.48686751e-06 -3.08392118e-05\n -3.87872387e-06 -5.73867237e-06 1.09538505e-05 -5.08076300e-06\n 5.10964995e-08 -2.53998151e-04 -9.67000524e-05]]"}, "_episode_num": 1178047, "use_sde": false, "sde_sample_freq": -1, "_current_progress_remaining": 0.0, "_stats_window_size": 100, "ep_info_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWV4AsAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwCAAAAAAAACMAWyUSwmMAXSUR0DrtVd5KODKdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtVWi9IwudX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtV35s0pFdX2UKGgGR8AuAAAAAAAAaAdLEGgIR0DrtVX67dzodX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtVyHi3ocdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtVsxPfsNdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtVk84giedX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtWKW5YozdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtWG0qpcYdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtWCQpWmxdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtWgXizcAdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtWYDSPU8dX2UKGgGR8AkAAAAAAAAaAdLC2gIR0DrtWDRRdhRdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtWVe5WildX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtWwwdsBRdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtWlqVQhwdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtWQzImw8dX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtWlvUjLTdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtXEAlOXWdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtW47FsHjdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtWmfzSThdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtXL2exwAdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtXaQlKK6dX2UKGgGR8AkAAAAAAAAaAdLC2gIR0DrtXEdHUc5dX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtW/WXC0odX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtXk0dilSdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtXafgaWHdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtXzEH+qBdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0DrtXaVclgMdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtYJgzguRdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtX0LXtjTdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtXp8HfMwdX2UKGgGR8AkAAAAAAAAaAdLC2gIR0DrtYELEUCadX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtYZ7tzCDdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtYUToMa1dX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtYMmb9ZSdX2UKGgGR8AkAAAAAAAAaAdLC2gIR0DrtYHJTVDsdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtYyXgtOEdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtYfegte2dX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtYX0vGp/dX2UKGgGR8AiAAAAAAAAaAdLCmgIR0DrtYvh4t6HdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtZCU4aP0dX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtYvDm8ujdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtYp+tr9EdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtZHDIBBBdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtY/NZ/0/dX2UKGgGR8AiAAAAAAAAaAdLCmgIR0DrtZdIOH32dX2UKGgGR8AIAAAAAAAAaAdLBGgIR0DrtZR3eN1hdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtY8++M6zdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtZUbayrxdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtZQB3A2ydX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtZzWFvhqdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtZoR8twrdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtZd4DcM3dX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtZrhLoOhdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtZ+wwCbMdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtaMzdDYzdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtZx5cC5mdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtaBrqt5ldX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtaO7UXpGdX2UKGgGR8AIAAAAAAAAaAdLBGgIR0DrtZ8bnX/YdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtaiAXEZSdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtaRXbM5fdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtaMW5YozdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtakS0Sh8dX2UKGgGR8AUAAAAAAAAaAdLBmgIR0Drtah8Ti84dX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtaaipvP1dX2UKGgGR8AiAAAAAAAAaAdLCmgIR0Drta9w3o9tdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtaytU4rCdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtbKlnAZbdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0Drta4La24NdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0Drtat2ZApsdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtbFrZ8KHdX2UKGgGR8AIAAAAAAAAaAdLBGgIR0DrtbClVtGedX2UKGgGR8AUAAAAAAAAaAdLBmgIR0Drta9vjOs1dX2UKGgGR8AcAAAAAAAAaAdLCGgIR0Drtbg50bLmdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0Drtbgmqo60dX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtbRs41gqdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0Drtbe6ClJpdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0Drtb3kFOfvdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0Drtb5mg8KYdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtbvR0EHMdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtcIB6KLsdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0DrtbtAGB4EdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtcHE2HcldX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtcVTOPeYdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtcEvUz9CdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0Drtb/mwqy4dX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtcaQKa5PdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtcYMBp6AdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0DrtcxDaXa8dX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtcTnanJldX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtczqL0jDdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtdIUvf0mdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtcylvZRLdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0Drtcq2l2vCdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtdH5IH1OdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtdYmGdqddX2UKGgGR8AUAAAAAAAAaAdLBmgIR0Drtc7UhmoSdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtdIWmgrZdWUu"}, "ep_success_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVhgAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIhlLg=="}, "_n_updates": 2499750, "observation_space": {":type:": "<class 'gymnasium.spaces.dict.Dict'>", ":serialized:": "gAWVMgQAAAAAAACMFWd5bW5hc2l1bS5zcGFjZXMuZGljdJSMBERpY3SUk5QpgZR9lCiMBnNwYWNlc5SMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwUZ3ltbmFzaXVtLnNwYWNlcy5ib3iUjANCb3iUk5QpgZR9lCiMBWR0eXBllIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYowNYm91bmRlZF9iZWxvd5SMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYDAAAAAAAAAAEBAZRoE4wCYjGUiYiHlFKUKEsDjAF8lE5OTkr/////Sv////9LAHSUYksDhZSMAUOUdJRSlIwNYm91bmRlZF9hYm92ZZRoHCiWAwAAAAAAAAABAQGUaCBLA4WUaCR0lFKUjAZfc2hhcGWUSwOFlIwDbG93lGgcKJYMAAAAAAAAAAAAIMEAACDBAAAgwZRoFksDhZRoJHSUUpSMBGhpZ2iUaBwolgwAAAAAAAAAAAAgQQAAIEEAACBBlGgWSwOFlGgkdJRSlIwIbG93X3JlcHKUjAUtMTAuMJSMCWhpZ2hfcmVwcpSMBDEwLjCUjApfbnBfcmFuZG9tlE51YowMZGVzaXJlZF9nb2FslGgNKYGUfZQoaBBoFmgZaBwolgMAAAAAAAAAAQEBlGggSwOFlGgkdJRSlGgnaBwolgMAAAAAAAAAAQEBlGggSwOFlGgkdJRSlGgsSwOFlGguaBwolgwAAAAAAAAAAAAgwQAAIMEAACDBlGgWSwOFlGgkdJRSlGgzaBwolgwAAAAAAAAAAAAgQQAAIEEAACBBlGgWSwOFlGgkdJRSlGg4jAUtMTAuMJRoOowEMTAuMJRoPE51YowLb2JzZXJ2YXRpb26UaA0pgZR9lChoEGgWaBloHCiWEwAAAAAAAAABAQEBAQEBAQEBAQEBAQEBAQEBlGggSxOFlGgkdJRSlGgnaBwolhMAAAAAAAAAAQEBAQEBAQEBAQEBAQEBAQEBAZRoIEsThZRoJHSUUpRoLEsThZRoLmgcKJZMAAAAAAAAAAAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMGUaBZLE4WUaCR0lFKUaDNoHCiWTAAAAAAAAAAAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBlGgWSxOFlGgkdJRSlGg4jAUtMTAuMJRoOowEMTAuMJRoPE51YnVoLE5oEE5oPE51Yi4=", "spaces": "OrderedDict([('achieved_goal', Box(-10.0, 10.0, (3,), float32)), ('desired_goal', Box(-10.0, 10.0, (3,), float32)), ('observation', Box(-10.0, 10.0, (19,), float32))])", "_shape": null, "dtype": null, "_np_random": null}, "action_space": {":type:": "<class 'gymnasium.spaces.box.Box'>", ":serialized:": "gAWVagIAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMDWJvdW5kZWRfYmVsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWBAAAAAAAAAABAQEBlGgIjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwSFlIwBQ5R0lFKUjA1ib3VuZGVkX2Fib3ZllGgRKJYEAAAAAAAAAAEBAQGUaBVLBIWUaBl0lFKUjAZfc2hhcGWUSwSFlIwDbG93lGgRKJYQAAAAAAAAAAAAgL8AAIC/AACAvwAAgL+UaAtLBIWUaBl0lFKUjARoaWdolGgRKJYQAAAAAAAAAAAAgD8AAIA/AACAPwAAgD+UaAtLBIWUaBl0lFKUjAhsb3dfcmVwcpSMBC0xLjCUjAloaWdoX3JlcHKUjAMxLjCUjApfbnBfcmFuZG9tlIwUbnVtcHkucmFuZG9tLl9waWNrbGWUjBBfX2dlbmVyYXRvcl9jdG9ylJOUjAVQQ0c2NJRoMowUX19iaXRfZ2VuZXJhdG9yX2N0b3KUk5SGlFKUfZQojA1iaXRfZ2VuZXJhdG9ylIwFUENHNjSUjAVzdGF0ZZR9lChoPYoQtENnGRUnDXyiNjutGXCENYwDaW5jlIoQISC+4Z/tETh6j9bkQPZWZ3WMCmhhc191aW50MzKUSwCMCHVpbnRlZ2VylEsAdWJ1Yi4=", "dtype": "float32", "bounded_below": "[ True True True True]", "bounded_above": "[ True True True True]", "_shape": [4], "low": "[-1. -1. -1. -1.]", "high": "[1. 1. 1. 1.]", "low_repr": "-1.0", "high_repr": "1.0", "_np_random": "Generator(PCG64)"}, "n_envs": 4, "buffer_size": 1000000, "batch_size": 2048, "learning_starts": 1000, "tau": 0.05, "gamma": 0.95, "gradient_steps": 1, "optimize_memory_usage": false, "replay_buffer_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVPwAAAAAAAACMJ3N0YWJsZV9iYXNlbGluZXMzLmhlci5oZXJfcmVwbGF5X2J1ZmZlcpSMD0hlclJlcGxheUJ1ZmZlcpSTlC4=", "__module__": "stable_baselines3.her.her_replay_buffer", "__annotations__": "{'env': typing.Optional[stable_baselines3.common.vec_env.base_vec_env.VecEnv]}", "__doc__": "\n Hindsight Experience Replay (HER) buffer.\n Paper: https://arxiv.org/abs/1707.01495\n\n Replay buffer for sampling HER (Hindsight Experience Replay) transitions.\n\n .. note::\n\n Compared to other implementations, the ``future`` goal sampling strategy is inclusive:\n the current transition can be used when re-sampling.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param env: The training environment\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n Disabled for now (see https://github.com/DLR-RM/stable-baselines3/pull/243#discussion_r531535702)\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n :param n_sampled_goal: Number of virtual transitions to create per real transition,\n by sampling new goals.\n :param goal_selection_strategy: Strategy for sampling goals for replay.\n One of ['episode', 'final', 'future']\n :param copy_info_dict: Whether to copy the info dictionary and pass it to\n ``compute_reward()`` method.\n Please note that the copy may cause a slowdown.\n False by default.\n ", "__init__": "<function HerReplayBuffer.__init__ at 0x715a77fe5e10>", "__getstate__": "<function HerReplayBuffer.__getstate__ at 0x715a77fe5ea0>", "__setstate__": "<function HerReplayBuffer.__setstate__ at 0x715a77fe5f30>", "set_env": "<function HerReplayBuffer.set_env at 0x715a77fe5fc0>", "add": "<function HerReplayBuffer.add at 0x715a77fe6050>", "_compute_episode_length": "<function HerReplayBuffer._compute_episode_length at 0x715a77fe60e0>", "sample": "<function HerReplayBuffer.sample at 0x715a77fe6170>", "_get_real_samples": "<function HerReplayBuffer._get_real_samples at 0x715a77fe6200>", "_get_virtual_samples": "<function HerReplayBuffer._get_virtual_samples at 0x715a77fe6290>", "_sample_goals": "<function HerReplayBuffer._sample_goals at 0x715a77fe6320>", "truncate_last_trajectory": "<function HerReplayBuffer.truncate_last_trajectory at 0x715a77fe63b0>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x715a77ff8740>"}, "replay_buffer_kwargs": {}, "train_freq": {":type:": "<class 'stable_baselines3.common.type_aliases.TrainFreq'>", ":serialized:": "gAWVYQAAAAAAAACMJXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi50eXBlX2FsaWFzZXOUjAlUcmFpbkZyZXGUk5RLAWgAjBJUcmFpbkZyZXF1ZW5jeVVuaXSUk5SMBHN0ZXCUhZRSlIaUgZQu"}, "use_sde_at_warmup": false, "target_entropy": -4.0, "ent_coef": "auto", "target_update_interval": 1, "top_quantiles_to_drop_per_net": 2, "lr_schedule": {":type:": "<class 'function'>", ":serialized:": "gAWVrQMAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLA0sTQwx0AIgAfACDAYMBUwCUToWUjAVmbG9hdJSFlIwScHJvZ3Jlc3NfcmVtYWluaW5nlIWUjF4vaG9tZS9sdXlhbmcvbWluaWNvbmRhMy9lbnZzL3JsL2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwIPGxhbWJkYT6US2FDAgwAlIwOdmFsdWVfc2NoZWR1bGWUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flIxeL2hvbWUvbHV5YW5nL21pbmljb25kYTMvZW52cy9ybC9saWIvcHl0aG9uMy4xMC9zaXRlLXBhY2thZ2VzL3N0YWJsZV9iYXNlbGluZXMzL2NvbW1vbi91dGlscy5weZR1Tk5oAIwQX21ha2VfZW1wdHlfY2VsbJSTlClSlIWUdJRSlGgAjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoIX2UfZQoaBhoD4wMX19xdWFsbmFtZV9flIwhZ2V0X3NjaGVkdWxlX2ZuLjxsb2NhbHM+LjxsYW1iZGE+lIwPX19hbm5vdGF0aW9uc19flH2UjA5fX2t3ZGVmYXVsdHNfX5ROjAxfX2RlZmF1bHRzX1+UTowKX19tb2R1bGVfX5RoGYwHX19kb2NfX5ROjAtfX2Nsb3N1cmVfX5RoAIwKX21ha2VfY2VsbJSTlGgCKGgHKEsBSwBLAEsBSwFLE0MEiABTAJRoCSmMAV+UhZRoDowEZnVuY5RLhUMCBAGUjAN2YWyUhZQpdJRSlGgVTk5oHSlSlIWUdJRSlGgjaD19lH2UKGgYaDRoJowZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5RoKH2UaCpOaCtOaCxoGWgtTmguaDBHP1BiTdLxqfyFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMIWUUpSFlGhFXZRoR32UdYaUhlIwLg=="}, "batch_norm_stats": [], "batch_norm_stats_target": [], "system_info": {"OS": "Linux-6.8.0-41-generic-x86_64-with-glibc2.39 # 41-Ubuntu SMP PREEMPT_DYNAMIC Fri Aug 2 20:41:06 UTC 2024", "Python": "3.10.12", "Stable-Baselines3": "2.3.2", "PyTorch": "2.4.1+cu121", "GPU Enabled": "True", "Numpy": "1.26.4", "Cloudpickle": "3.0.0", "Gymnasium": "0.29.1", "OpenAI Gym": "0.26.2"}}
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": -26.6, "std_reward": 19.925862591115095, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2024-09-21T02:01:48.617099"}
 
1
+ {"mean_reward": -5.8, "std_reward": 3.1559467676119, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2024-09-21T17:58:48.764628"}
tqc-PandaPickAndPlace-v3.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:909ae229ec260dc8c868bee620788bae77bb67ade2319fd04e05ee3bb1e0dda2
3
+ size 24284513
tqc-PandaPickAndPlace-v3/_stable_baselines3_version ADDED
@@ -0,0 +1 @@
 
 
1
+ 2.3.2
tqc-PandaPickAndPlace-v3/actor.optimizer.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5744f49136384ea5c4ff55b68e05e9185dbd54691221a23b8448c60a8bbcc3bb
3
+ size 4350332
tqc-PandaPickAndPlace-v3/critic.optimizer.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4010bd348f5da00c9a24be95bbec9d64c921f9b9e334ff0fcaed097766cafd95
3
+ size 8869382
tqc-PandaPickAndPlace-v3/data ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "policy_class": {
3
+ ":type:": "<class 'abc.ABCMeta'>",
4
+ ":serialized:": "gAWVMQAAAAAAAACMGHNiM19jb250cmliLnRxYy5wb2xpY2llc5SMEE11bHRpSW5wdXRQb2xpY3mUk5Qu",
5
+ "__module__": "sb3_contrib.tqc.policies",
6
+ "__doc__": "\n Policy class (with both actor and critic) for TQC.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param use_expln: Use ``expln()`` function instead of ``exp()`` when using gSDE to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param clip_mean: Clip the mean output when using gSDE to avoid numerical instability.\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n :param n_quantiles: Number of quantiles for the critic.\n :param n_critics: Number of critic networks to create.\n :param share_features_extractor: Whether to share or not the features extractor\n between the actor and the critic (this saves computation time)\n ",
7
+ "__init__": "<function MultiInputPolicy.__init__ at 0x715a28a5b0a0>",
8
+ "__abstractmethods__": "frozenset()",
9
+ "_abc_impl": "<_abc._abc_data object at 0x715a28a57d00>"
10
+ },
11
+ "verbose": 1,
12
+ "policy_kwargs": {
13
+ "net_arch": [
14
+ 512,
15
+ 512,
16
+ 512
17
+ ],
18
+ "n_critics": 2,
19
+ "use_sde": false
20
+ },
21
+ "num_timesteps": 10000000,
22
+ "_total_timesteps": 10000000,
23
+ "_num_timesteps_at_start": 0,
24
+ "seed": null,
25
+ "action_noise": null,
26
+ "start_time": 1726855976903048469,
27
+ "learning_rate": 0.001,
28
+ "tensorboard_log": null,
29
+ "_last_obs": null,
30
+ "_last_episode_starts": {
31
+ ":type:": "<class 'numpy.ndarray'>",
32
+ ":serialized:": "gAWVdwAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYEAAAAAAAAAAEBAQGUjAVudW1weZSMBWR0eXBllJOUjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwSFlIwBQ5R0lFKULg=="
33
+ },
34
+ "_last_original_obs": {
35
+ ":type:": "<class 'collections.OrderedDict'>",
36
+ ":serialized:": "gAWViwIAAAAAAACMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwSbnVtcHkuY29yZS5udW1lcmljlIwLX2Zyb21idWZmZXKUk5QoljAAAAAAAAAAgnwcPUypq73iwKM8hNxrOrTny70WUqk8Qg2sPEXVAr4K16M8c3aevGtyGb4VwaM8lIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksESwOGlIwBQ5R0lFKUjAxkZXNpcmVkX2dvYWyUaAcoljAAAAAAAAAAEirrO+DXEb1CVjI+1b8hPQkvCj5Nn1w+s9PzPUxp+D20CqU9PGVBPRsKpr1XCho+lGgOSwRLA4aUaBJ0lFKUjAtvYnNlcnZhdGlvbpRoByiWMAEAAAAAAACOp3Q9j7CPvfoc9z0qbm2+4nccvyvnq78oc38ygnwcPUypq73iwKM8QJaWNls4Fbi07gG3yB/ZtTjGNzcPioC1YjkjMmrPSLjPn8q400SQO5FLpr1F5KI7lPMivP+93j2pwwW+aGKjPYTcazq058u9FlKpPLKJDT074484f4JbveD3aLvB+5I+SE+GPRcPWUCdiSO+YX/TwOpyHT0ZbBqsQyNKPgAAAAAAAACAAAAAAAAAAABCDaw8RdUCvgrXozwAAAAAAAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJmLU84xzLvDhgKj4cgVG/w4idv8GQhb+IdpEyc3aevGtyGb4VwaM85o2WNlpZAbj8JYK20o7Atm7GNzdse6q2MnVbMw4rhblyy8q4lGgOSwRLE4aUaBJ0lFKUdS4=",
37
+ "achieved_goal": "[[ 0.03820468 -0.083819 0.01998943]\n [ 0.00089974 -0.09956303 0.02066902]\n [ 0.02100242 -0.12776668 0.02 ]\n [-0.01934359 -0.14985053 0.01998953]]",
38
+ "desired_goal": "[[ 0.00717665 -0.03560627 0.17415717]\n [ 0.03948959 0.13494505 0.21545143]\n [ 0.11905613 0.12129459 0.08058682]\n [ 0.04721569 -0.08107396 0.15043007]]",
39
+ "observation": "[[ 5.97301051e-02 -7.01609775e-02 1.20660737e-01 -2.31865555e-01\n -6.11204267e-01 -1.34299219e+00 1.48691370e-08 3.82046774e-02\n -8.38190019e-02 1.99894346e-02 4.48783976e-06 -3.55768534e-05\n -7.74457658e-06 -1.61770095e-06 1.09538014e-05 -9.57692350e-07\n 9.50089607e-09 -4.78768852e-05 -9.66187727e-05]\n [ 4.40273574e-03 -8.11988190e-02 4.97105950e-03 -9.94576886e-03\n 1.08760826e-01 -1.30629197e-01 7.97775388e-02 8.99739796e-04\n -9.95630324e-02 2.06690244e-02 3.45551446e-02 6.86109634e-05\n -5.35912476e-02 -3.55481356e-03 2.87076980e-01 6.55809045e-02\n 3.39154601e+00 -1.59704641e-01 -6.60929918e+00]\n [ 3.84396687e-02 -2.19447225e-12 1.97400138e-01 0.00000000e+00\n -0.00000000e+00 0.00000000e+00 0.00000000e+00 2.10024156e-02\n -1.27766684e-01 1.99999996e-02 0.00000000e+00 -0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00\n 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n [ 2.21672226e-02 -2.47940477e-02 1.66382670e-01 -8.18376303e-01\n -1.23073614e+00 -1.04348004e+00 1.69341234e-08 -1.93435904e-02\n -1.49850532e-01 1.99895296e-02 4.48686751e-06 -3.08392118e-05\n -3.87872387e-06 -5.73867237e-06 1.09538505e-05 -5.08076300e-06\n 5.10964995e-08 -2.53998151e-04 -9.67000524e-05]]"
40
+ },
41
+ "_episode_num": 1178047,
42
+ "use_sde": false,
43
+ "sde_sample_freq": -1,
44
+ "_current_progress_remaining": 0.0,
45
+ "_stats_window_size": 100,
46
+ "ep_info_buffer": {
47
+ ":type:": "<class 'collections.deque'>",
48
+ ":serialized:": "gAWV4AsAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwCAAAAAAAACMAWyUSwmMAXSUR0DrtVd5KODKdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtVWi9IwudX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtV35s0pFdX2UKGgGR8AuAAAAAAAAaAdLEGgIR0DrtVX67dzodX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtVyHi3ocdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtVsxPfsNdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtVk84giedX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtWKW5YozdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtWG0qpcYdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtWCQpWmxdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtWgXizcAdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtWYDSPU8dX2UKGgGR8AkAAAAAAAAaAdLC2gIR0DrtWDRRdhRdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtWVe5WildX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtWwwdsBRdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtWlqVQhwdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtWQzImw8dX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtWlvUjLTdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtXEAlOXWdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtW47FsHjdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtWmfzSThdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtXL2exwAdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtXaQlKK6dX2UKGgGR8AkAAAAAAAAaAdLC2gIR0DrtXEdHUc5dX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtW/WXC0odX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtXk0dilSdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtXafgaWHdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtXzEH+qBdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0DrtXaVclgMdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtYJgzguRdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtX0LXtjTdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtXp8HfMwdX2UKGgGR8AkAAAAAAAAaAdLC2gIR0DrtYELEUCadX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtYZ7tzCDdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtYUToMa1dX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtYMmb9ZSdX2UKGgGR8AkAAAAAAAAaAdLC2gIR0DrtYHJTVDsdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtYyXgtOEdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtYfegte2dX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtYX0vGp/dX2UKGgGR8AiAAAAAAAAaAdLCmgIR0DrtYvh4t6HdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtZCU4aP0dX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtYvDm8ujdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtYp+tr9EdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtZHDIBBBdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtY/NZ/0/dX2UKGgGR8AiAAAAAAAAaAdLCmgIR0DrtZdIOH32dX2UKGgGR8AIAAAAAAAAaAdLBGgIR0DrtZR3eN1hdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtY8++M6zdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtZUbayrxdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtZQB3A2ydX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtZzWFvhqdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtZoR8twrdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtZd4DcM3dX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtZrhLoOhdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtZ+wwCbMdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtaMzdDYzdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtZx5cC5mdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtaBrqt5ldX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtaO7UXpGdX2UKGgGR8AIAAAAAAAAaAdLBGgIR0DrtZ8bnX/YdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtaiAXEZSdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtaRXbM5fdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtaMW5YozdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtakS0Sh8dX2UKGgGR8AUAAAAAAAAaAdLBmgIR0Drtah8Ti84dX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtaaipvP1dX2UKGgGR8AiAAAAAAAAaAdLCmgIR0Drta9w3o9tdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtaytU4rCdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtbKlnAZbdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0Drta4La24NdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0Drtat2ZApsdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtbFrZ8KHdX2UKGgGR8AIAAAAAAAAaAdLBGgIR0DrtbClVtGedX2UKGgGR8AUAAAAAAAAaAdLBmgIR0Drta9vjOs1dX2UKGgGR8AcAAAAAAAAaAdLCGgIR0Drtbg50bLmdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0Drtbgmqo60dX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtbRs41gqdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0Drtbe6ClJpdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0Drtb3kFOfvdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0Drtb5mg8KYdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtbvR0EHMdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtcIB6KLsdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0DrtbtAGB4EdX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtcHE2HcldX2UKGgGR8AQAAAAAAAAaAdLBWgIR0DrtcVTOPeYdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtcEvUz9CdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0Drtb/mwqy4dX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtcaQKa5PdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtcYMBp6AdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0DrtcxDaXa8dX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtcTnanJldX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtczqL0jDdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtdIUvf0mdX2UKGgGR8AgAAAAAAAAaAdLCWgIR0DrtcylvZRLdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0Drtcq2l2vCdX2UKGgGR8AYAAAAAAAAaAdLB2gIR0DrtdH5IH1OdX2UKGgGR8AUAAAAAAAAaAdLBmgIR0DrtdYmGdqddX2UKGgGR8AUAAAAAAAAaAdLBmgIR0Drtc7UhmoSdX2UKGgGR8AcAAAAAAAAaAdLCGgIR0DrtdIWmgrZdWUu"
49
+ },
50
+ "ep_success_buffer": {
51
+ ":type:": "<class 'collections.deque'>",
52
+ ":serialized:": "gAWVhgAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIhlLg=="
53
+ },
54
+ "_n_updates": 2499750,
55
+ "observation_space": {
56
+ ":type:": "<class 'gymnasium.spaces.dict.Dict'>",
57
+ ":serialized:": "gAWVMgQAAAAAAACMFWd5bW5hc2l1bS5zcGFjZXMuZGljdJSMBERpY3SUk5QpgZR9lCiMBnNwYWNlc5SMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwUZ3ltbmFzaXVtLnNwYWNlcy5ib3iUjANCb3iUk5QpgZR9lCiMBWR0eXBllIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYowNYm91bmRlZF9iZWxvd5SMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYDAAAAAAAAAAEBAZRoE4wCYjGUiYiHlFKUKEsDjAF8lE5OTkr/////Sv////9LAHSUYksDhZSMAUOUdJRSlIwNYm91bmRlZF9hYm92ZZRoHCiWAwAAAAAAAAABAQGUaCBLA4WUaCR0lFKUjAZfc2hhcGWUSwOFlIwDbG93lGgcKJYMAAAAAAAAAAAAIMEAACDBAAAgwZRoFksDhZRoJHSUUpSMBGhpZ2iUaBwolgwAAAAAAAAAAAAgQQAAIEEAACBBlGgWSwOFlGgkdJRSlIwIbG93X3JlcHKUjAUtMTAuMJSMCWhpZ2hfcmVwcpSMBDEwLjCUjApfbnBfcmFuZG9tlE51YowMZGVzaXJlZF9nb2FslGgNKYGUfZQoaBBoFmgZaBwolgMAAAAAAAAAAQEBlGggSwOFlGgkdJRSlGgnaBwolgMAAAAAAAAAAQEBlGggSwOFlGgkdJRSlGgsSwOFlGguaBwolgwAAAAAAAAAAAAgwQAAIMEAACDBlGgWSwOFlGgkdJRSlGgzaBwolgwAAAAAAAAAAAAgQQAAIEEAACBBlGgWSwOFlGgkdJRSlGg4jAUtMTAuMJRoOowEMTAuMJRoPE51YowLb2JzZXJ2YXRpb26UaA0pgZR9lChoEGgWaBloHCiWEwAAAAAAAAABAQEBAQEBAQEBAQEBAQEBAQEBlGggSxOFlGgkdJRSlGgnaBwolhMAAAAAAAAAAQEBAQEBAQEBAQEBAQEBAQEBAZRoIEsThZRoJHSUUpRoLEsThZRoLmgcKJZMAAAAAAAAAAAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMGUaBZLE4WUaCR0lFKUaDNoHCiWTAAAAAAAAAAAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBlGgWSxOFlGgkdJRSlGg4jAUtMTAuMJRoOowEMTAuMJRoPE51YnVoLE5oEE5oPE51Yi4=",
58
+ "spaces": "OrderedDict([('achieved_goal', Box(-10.0, 10.0, (3,), float32)), ('desired_goal', Box(-10.0, 10.0, (3,), float32)), ('observation', Box(-10.0, 10.0, (19,), float32))])",
59
+ "_shape": null,
60
+ "dtype": null,
61
+ "_np_random": null
62
+ },
63
+ "action_space": {
64
+ ":type:": "<class 'gymnasium.spaces.box.Box'>",
65
+ ":serialized:": "gAWVagIAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMDWJvdW5kZWRfYmVsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWBAAAAAAAAAABAQEBlGgIjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwSFlIwBQ5R0lFKUjA1ib3VuZGVkX2Fib3ZllGgRKJYEAAAAAAAAAAEBAQGUaBVLBIWUaBl0lFKUjAZfc2hhcGWUSwSFlIwDbG93lGgRKJYQAAAAAAAAAAAAgL8AAIC/AACAvwAAgL+UaAtLBIWUaBl0lFKUjARoaWdolGgRKJYQAAAAAAAAAAAAgD8AAIA/AACAPwAAgD+UaAtLBIWUaBl0lFKUjAhsb3dfcmVwcpSMBC0xLjCUjAloaWdoX3JlcHKUjAMxLjCUjApfbnBfcmFuZG9tlIwUbnVtcHkucmFuZG9tLl9waWNrbGWUjBBfX2dlbmVyYXRvcl9jdG9ylJOUjAVQQ0c2NJRoMowUX19iaXRfZ2VuZXJhdG9yX2N0b3KUk5SGlFKUfZQojA1iaXRfZ2VuZXJhdG9ylIwFUENHNjSUjAVzdGF0ZZR9lChoPYoQtENnGRUnDXyiNjutGXCENYwDaW5jlIoQISC+4Z/tETh6j9bkQPZWZ3WMCmhhc191aW50MzKUSwCMCHVpbnRlZ2VylEsAdWJ1Yi4=",
66
+ "dtype": "float32",
67
+ "bounded_below": "[ True True True True]",
68
+ "bounded_above": "[ True True True True]",
69
+ "_shape": [
70
+ 4
71
+ ],
72
+ "low": "[-1. -1. -1. -1.]",
73
+ "high": "[1. 1. 1. 1.]",
74
+ "low_repr": "-1.0",
75
+ "high_repr": "1.0",
76
+ "_np_random": "Generator(PCG64)"
77
+ },
78
+ "n_envs": 4,
79
+ "buffer_size": 1000000,
80
+ "batch_size": 2048,
81
+ "learning_starts": 1000,
82
+ "tau": 0.05,
83
+ "gamma": 0.95,
84
+ "gradient_steps": 1,
85
+ "optimize_memory_usage": false,
86
+ "replay_buffer_class": {
87
+ ":type:": "<class 'abc.ABCMeta'>",
88
+ ":serialized:": "gAWVPwAAAAAAAACMJ3N0YWJsZV9iYXNlbGluZXMzLmhlci5oZXJfcmVwbGF5X2J1ZmZlcpSMD0hlclJlcGxheUJ1ZmZlcpSTlC4=",
89
+ "__module__": "stable_baselines3.her.her_replay_buffer",
90
+ "__annotations__": "{'env': typing.Optional[stable_baselines3.common.vec_env.base_vec_env.VecEnv]}",
91
+ "__doc__": "\n Hindsight Experience Replay (HER) buffer.\n Paper: https://arxiv.org/abs/1707.01495\n\n Replay buffer for sampling HER (Hindsight Experience Replay) transitions.\n\n .. note::\n\n Compared to other implementations, the ``future`` goal sampling strategy is inclusive:\n the current transition can be used when re-sampling.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param env: The training environment\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n Disabled for now (see https://github.com/DLR-RM/stable-baselines3/pull/243#discussion_r531535702)\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n :param n_sampled_goal: Number of virtual transitions to create per real transition,\n by sampling new goals.\n :param goal_selection_strategy: Strategy for sampling goals for replay.\n One of ['episode', 'final', 'future']\n :param copy_info_dict: Whether to copy the info dictionary and pass it to\n ``compute_reward()`` method.\n Please note that the copy may cause a slowdown.\n False by default.\n ",
92
+ "__init__": "<function HerReplayBuffer.__init__ at 0x715a77fe5e10>",
93
+ "__getstate__": "<function HerReplayBuffer.__getstate__ at 0x715a77fe5ea0>",
94
+ "__setstate__": "<function HerReplayBuffer.__setstate__ at 0x715a77fe5f30>",
95
+ "set_env": "<function HerReplayBuffer.set_env at 0x715a77fe5fc0>",
96
+ "add": "<function HerReplayBuffer.add at 0x715a77fe6050>",
97
+ "_compute_episode_length": "<function HerReplayBuffer._compute_episode_length at 0x715a77fe60e0>",
98
+ "sample": "<function HerReplayBuffer.sample at 0x715a77fe6170>",
99
+ "_get_real_samples": "<function HerReplayBuffer._get_real_samples at 0x715a77fe6200>",
100
+ "_get_virtual_samples": "<function HerReplayBuffer._get_virtual_samples at 0x715a77fe6290>",
101
+ "_sample_goals": "<function HerReplayBuffer._sample_goals at 0x715a77fe6320>",
102
+ "truncate_last_trajectory": "<function HerReplayBuffer.truncate_last_trajectory at 0x715a77fe63b0>",
103
+ "__abstractmethods__": "frozenset()",
104
+ "_abc_impl": "<_abc._abc_data object at 0x715a77ff8740>"
105
+ },
106
+ "replay_buffer_kwargs": {},
107
+ "train_freq": {
108
+ ":type:": "<class 'stable_baselines3.common.type_aliases.TrainFreq'>",
109
+ ":serialized:": "gAWVYQAAAAAAAACMJXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi50eXBlX2FsaWFzZXOUjAlUcmFpbkZyZXGUk5RLAWgAjBJUcmFpbkZyZXF1ZW5jeVVuaXSUk5SMBHN0ZXCUhZRSlIaUgZQu"
110
+ },
111
+ "use_sde_at_warmup": false,
112
+ "target_entropy": -4.0,
113
+ "ent_coef": "auto",
114
+ "target_update_interval": 1,
115
+ "top_quantiles_to_drop_per_net": 2,
116
+ "lr_schedule": {
117
+ ":type:": "<class 'function'>",
118
+ ":serialized:": "gAWVrQMAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLA0sTQwx0AIgAfACDAYMBUwCUToWUjAVmbG9hdJSFlIwScHJvZ3Jlc3NfcmVtYWluaW5nlIWUjF4vaG9tZS9sdXlhbmcvbWluaWNvbmRhMy9lbnZzL3JsL2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwIPGxhbWJkYT6US2FDAgwAlIwOdmFsdWVfc2NoZWR1bGWUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flIxeL2hvbWUvbHV5YW5nL21pbmljb25kYTMvZW52cy9ybC9saWIvcHl0aG9uMy4xMC9zaXRlLXBhY2thZ2VzL3N0YWJsZV9iYXNlbGluZXMzL2NvbW1vbi91dGlscy5weZR1Tk5oAIwQX21ha2VfZW1wdHlfY2VsbJSTlClSlIWUdJRSlGgAjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoIX2UfZQoaBhoD4wMX19xdWFsbmFtZV9flIwhZ2V0X3NjaGVkdWxlX2ZuLjxsb2NhbHM+LjxsYW1iZGE+lIwPX19hbm5vdGF0aW9uc19flH2UjA5fX2t3ZGVmYXVsdHNfX5ROjAxfX2RlZmF1bHRzX1+UTowKX19tb2R1bGVfX5RoGYwHX19kb2NfX5ROjAtfX2Nsb3N1cmVfX5RoAIwKX21ha2VfY2VsbJSTlGgCKGgHKEsBSwBLAEsBSwFLE0MEiABTAJRoCSmMAV+UhZRoDowEZnVuY5RLhUMCBAGUjAN2YWyUhZQpdJRSlGgVTk5oHSlSlIWUdJRSlGgjaD19lH2UKGgYaDRoJowZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5RoKH2UaCpOaCtOaCxoGWgtTmguaDBHP1BiTdLxqfyFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMIWUUpSFlGhFXZRoR32UdYaUhlIwLg=="
119
+ },
120
+ "batch_norm_stats": [],
121
+ "batch_norm_stats_target": []
122
+ }
tqc-PandaPickAndPlace-v3/ent_coef_optimizer.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:067d350706e5b1223e73293e65f71aa9a8b20bcffe1f1b3c63acb2fb92f6f1c9
3
+ size 1940
tqc-PandaPickAndPlace-v3/policy.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31029006489c0a3c22081e80bfa4dc7042926e5a203beab8360c23c1309fd783
3
+ size 11042200
tqc-PandaPickAndPlace-v3/pytorch_variables.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2de5e34c855aee4251dded1aa877d09fc5dbea1712d46a6a957a42dfe1b9db2d
3
+ size 1180
tqc-PandaPickAndPlace-v3/system_info.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ - OS: Linux-6.8.0-41-generic-x86_64-with-glibc2.39 # 41-Ubuntu SMP PREEMPT_DYNAMIC Fri Aug 2 20:41:06 UTC 2024
2
+ - Python: 3.10.12
3
+ - Stable-Baselines3: 2.3.2
4
+ - PyTorch: 2.4.1+cu121
5
+ - GPU Enabled: True
6
+ - Numpy: 1.26.4
7
+ - Cloudpickle: 3.0.0
8
+ - Gymnasium: 0.29.1
9
+ - OpenAI Gym: 0.26.2
vec_normalize.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:816d1f55d3a1e12c88477bcd8bfb7fe032ec98e9ae832f33759ea7f983a37b81
3
- size 3248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a6363ad8ad877804595688ed93bc99a62827a43399c4a4b85d41e5a57d5b7cb
3
+ size 3247