updated code
Browse files
README.md
CHANGED
@@ -30,8 +30,53 @@ TODO: Add your code
|
|
30 |
|
31 |
|
32 |
```python
|
33 |
-
|
34 |
-
|
|
|
35 |
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
```
|
|
|
30 |
|
31 |
|
32 |
```python
|
33 |
+
import pybullet_envs
|
34 |
+
import panda_gym
|
35 |
+
import gym
|
36 |
|
37 |
+
import os
|
38 |
+
|
39 |
+
from huggingface_sb3 import load_from_hub, package_to_hub
|
40 |
+
|
41 |
+
from stable_baselines3 import A2C
|
42 |
+
from stable_baselines3.common.evaluation import evaluate_policy
|
43 |
+
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
|
44 |
+
from stable_baselines3.common.env_util import make_vec_env
|
45 |
+
|
46 |
+
from huggingface_hub import notebook_login
|
47 |
+
|
48 |
+
#Environment 1: AntBulletEnv-v0
|
49 |
+
env_id = "AntBulletEnv-v0"
|
50 |
+
# Create the env
|
51 |
+
env = gym.make(env_id)
|
52 |
+
|
53 |
+
env = make_vec_env(env_id, n_envs=4)
|
54 |
+
|
55 |
+
# Adding this wrapper to normalize the observation and the reward
|
56 |
+
env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10)
|
57 |
+
|
58 |
+
#create A2C model
|
59 |
+
model = A2C(policy = "MlpPolicy",
|
60 |
+
env = env,
|
61 |
+
gae_lambda = 0.9,
|
62 |
+
gamma = 0.99,
|
63 |
+
learning_rate = 0.00096,
|
64 |
+
max_grad_norm = 0.5,
|
65 |
+
n_steps = 8,
|
66 |
+
vf_coef = 0.4,
|
67 |
+
ent_coef = 0.0,
|
68 |
+
seed=11,
|
69 |
+
policy_kwargs=dict(
|
70 |
+
log_std_init=-2, ortho_init=False),
|
71 |
+
normalize_advantage=False,
|
72 |
+
use_rms_prop= True,
|
73 |
+
use_sde= True,
|
74 |
+
verbose=1)
|
75 |
+
|
76 |
+
#train agent
|
77 |
+
model.learn(1_500_000)
|
78 |
+
|
79 |
+
# Save the model and VecNormalize statistics when saving the agent
|
80 |
+
model.save("a2c-AntBulletEnv-v0")
|
81 |
+
env.save("vec_normalize.pkl")
|
82 |
```
|