asuzuki commited on
Commit
1b6c68c
·
1 Parent(s): bb241e4

updated code

Browse files
Files changed (1) hide show
  1. README.md +48 -3
README.md CHANGED
@@ -30,8 +30,53 @@ TODO: Add your code
30
 
31
 
32
  ```python
33
- from stable_baselines3 import ...
34
- from huggingface_sb3 import load_from_hub
 
35
 
36
- ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  ```
 
30
 
31
 
32
  ```python
33
+ import pybullet_envs
34
+ import panda_gym
35
+ import gym
36
 
37
+ import os
38
+
39
+ from huggingface_sb3 import load_from_hub, package_to_hub
40
+
41
+ from stable_baselines3 import A2C
42
+ from stable_baselines3.common.evaluation import evaluate_policy
43
+ from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
44
+ from stable_baselines3.common.env_util import make_vec_env
45
+
46
+ from huggingface_hub import notebook_login
47
+
48
+ #Environment 1: AntBulletEnv-v0
49
+ env_id = "AntBulletEnv-v0"
50
+ # Create the env
51
+ env = gym.make(env_id)
52
+
53
+ env = make_vec_env(env_id, n_envs=4)
54
+
55
+ # Adding this wrapper to normalize the observation and the reward
56
+ env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10)
57
+
58
+ #create A2C model
59
+ model = A2C(policy = "MlpPolicy",
60
+ env = env,
61
+ gae_lambda = 0.9,
62
+ gamma = 0.99,
63
+ learning_rate = 0.00096,
64
+ max_grad_norm = 0.5,
65
+ n_steps = 8,
66
+ vf_coef = 0.4,
67
+ ent_coef = 0.0,
68
+ seed=11,
69
+ policy_kwargs=dict(
70
+ log_std_init=-2, ortho_init=False),
71
+ normalize_advantage=False,
72
+ use_rms_prop= True,
73
+ use_sde= True,
74
+ verbose=1)
75
+
76
+ #train agent
77
+ model.learn(1_500_000)
78
+
79
+ # Save the model and VecNormalize statistics when saving the agent
80
+ model.save("a2c-AntBulletEnv-v0")
81
+ env.save("vec_normalize.pkl")
82
  ```