Update code
Browse files
README.md
CHANGED
@@ -32,7 +32,7 @@ from stable_baselines3.common.callbacks import EvalCallback
|
|
32 |
|
33 |
# Create the environment
|
34 |
env_id = "LunarLander-v2"
|
35 |
-
n_envs =
|
36 |
env = make_vec_env(env_id, n_envs=n_envs)
|
37 |
|
38 |
# Create the evaluation envs
|
@@ -58,14 +58,14 @@ model = DQN(
|
|
58 |
env,
|
59 |
learning_starts=0,
|
60 |
batch_size=128,
|
61 |
-
buffer_size=
|
62 |
-
learning_rate=
|
63 |
target_update_interval=250,
|
64 |
-
train_freq=
|
65 |
-
gradient_steps
|
66 |
-
# Explore for
|
67 |
-
exploration_fraction=0.
|
68 |
-
exploration_final_eps=0.
|
69 |
policy_kwargs=dict(net_arch=[256, 256]),
|
70 |
verbose=1,
|
71 |
)
|
|
|
32 |
|
33 |
# Create the environment
|
34 |
env_id = "LunarLander-v2"
|
35 |
+
n_envs = 8
|
36 |
env = make_vec_env(env_id, n_envs=n_envs)
|
37 |
|
38 |
# Create the evaluation envs
|
|
|
58 |
env,
|
59 |
learning_starts=0,
|
60 |
batch_size=128,
|
61 |
+
buffer_size=100000,
|
62 |
+
learning_rate=7e-4,
|
63 |
target_update_interval=250,
|
64 |
+
train_freq=1,
|
65 |
+
gradient_steps=4,
|
66 |
+
# Explore for 40_000 timesteps
|
67 |
+
exploration_fraction=0.08,
|
68 |
+
exploration_final_eps=0.05,
|
69 |
policy_kwargs=dict(net_arch=[256, 256]),
|
70 |
verbose=1,
|
71 |
)
|