SuperSecureHuman commited on
Commit
c857d52
·
unverified ·
1 Parent(s): a511f27

minor changes

Browse files
Files changed (1) hide show
  1. Main.ipynb +51 -87
Main.ipynb CHANGED
@@ -29,23 +29,29 @@
29
  {
30
  "cell_type": "code",
31
  "execution_count": null,
 
 
 
 
 
 
32
  "outputs": [],
33
  "source": [
34
  "config = {\n",
35
  " \"policy_type\": \"MlpPolicy\",\n",
36
  " \"env_name\": \"BipedalWalker-v3\",\n",
37
  "}"
38
- ],
39
- "metadata": {
40
- "collapsed": false,
41
- "pycharm": {
42
- "name": "#%%\n"
43
- }
44
- }
45
  },
46
  {
47
  "cell_type": "code",
48
  "execution_count": null,
 
 
 
 
 
 
49
  "outputs": [],
50
  "source": [
51
  "run = wandb.init(\n",
@@ -55,13 +61,7 @@
55
  " monitor_gym=True, # auto-upload the videos of agents playing the game\n",
56
  " save_code=True, # optional\n",
57
  ")"
58
- ],
59
- "metadata": {
60
- "collapsed": false,
61
- "pycharm": {
62
- "name": "#%%\n"
63
- }
64
- }
65
  },
66
  {
67
  "cell_type": "code",
@@ -80,10 +80,9 @@
80
  "source": [
81
  "import gym\n",
82
  "\n",
83
- "# First, we create our environment called LunarLander-v2\n",
84
  "env = gym.make(\"BipedalWalker-v3\")\n",
85
  "\n",
86
- "# Then we reset this environment\n",
87
  "observation = env.reset()\n",
88
  "\n",
89
  "for _ in range(200):\n",
@@ -92,7 +91,6 @@
92
  " print(\"Action taken:\", action)\n",
93
  " env.render()\n",
94
  "\n",
95
- "\n",
96
  " # Do this action in the environment and get\n",
97
  " # next_state, reward, done and info\n",
98
  " observation, reward, done, info = env.step(action)\n",
@@ -143,31 +141,31 @@
143
  {
144
  "cell_type": "code",
145
  "execution_count": null,
146
- "outputs": [],
147
- "source": [
148
- "eval_env = make_vec_env(\"BipedalWalker-v3\", n_envs=1)"
149
- ],
150
  "metadata": {
151
- "collapsed": false,
152
  "pycharm": {
153
  "name": "#%%\n"
154
  }
155
- }
 
 
 
 
156
  },
157
  {
158
  "cell_type": "code",
159
  "execution_count": null,
160
- "outputs": [],
161
- "source": [
162
- "callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=300, verbose=1)\n",
163
- "eval_callback = EvalCallback(eval_env, callback_on_new_best=callback_on_best, verbose=1)"
164
- ],
165
  "metadata": {
166
- "collapsed": false,
167
  "pycharm": {
168
  "name": "#%%\n"
169
  }
170
- }
 
 
 
 
 
171
  },
172
  {
173
  "cell_type": "code",
@@ -200,44 +198,44 @@
200
  {
201
  "cell_type": "code",
202
  "execution_count": null,
203
- "outputs": [],
204
- "source": [
205
- "env_id = 'BipedalWalker-v3'"
206
- ],
207
  "metadata": {
208
- "collapsed": false,
209
  "pycharm": {
210
  "name": "#%%\n"
211
  }
212
- }
 
 
 
 
213
  },
214
  {
215
  "cell_type": "code",
216
  "execution_count": null,
217
- "outputs": [],
218
- "source": [
219
- "model.learn(total_timesteps=50000000, callback=[WandbCallback() , eval_callback])"
220
- ],
221
  "metadata": {
222
- "collapsed": false,
223
  "pycharm": {
224
  "name": "#%%\n"
225
  }
226
- }
 
 
 
 
227
  },
228
  {
229
  "cell_type": "code",
230
  "execution_count": null,
231
- "outputs": [],
232
- "source": [
233
- "model.save('300-Trained.zip')"
234
- ],
235
  "metadata": {
236
- "collapsed": false,
237
  "pycharm": {
238
  "name": "#%%\n"
239
  }
240
- }
 
 
 
 
241
  },
242
  {
243
  "cell_type": "code",
@@ -278,18 +276,6 @@
278
  "eval_env.close()"
279
  ]
280
  },
281
- {
282
- "cell_type": "code",
283
- "execution_count": null,
284
- "id": "de40c367",
285
- "metadata": {
286
- "pycharm": {
287
- "name": "#%%\n"
288
- }
289
- },
290
- "outputs": [],
291
- "source": []
292
- },
293
  {
294
  "cell_type": "code",
295
  "execution_count": null,
@@ -313,48 +299,26 @@
313
  "\n",
314
  "from huggingface_sb3 import package_to_hub\n",
315
  "\n",
316
- "# PLACE the variables you've just defined two cells above\n",
317
- "# Define the name of the environment\n",
318
  "env_id = \"BipedalWalker-v3\"\n",
319
  "\n",
320
- "# TODO: Define the model architecture we used\n",
321
  "model_architecture = \"TD3\"\n",
322
  "model_name = \"TD3_BipedalWalker-v3\"\n",
323
  "\n",
324
- "## Define a repo_id\n",
325
- "## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n",
326
- "## CHANGE WITH YOUR REPO ID\n",
327
  "repo_id = \"SuperSecureHuman/BipedalWalker-v3-TD3\"\n",
328
  "\n",
329
- "## Define the commit message\n",
330
  "commit_message = \"Upload score 300 trained bipedal walker\"\n",
331
  "\n",
332
- "# Create the evaluation env\n",
333
  "eval_env = DummyVecEnv([lambda: gym.make(env_id)])\n",
334
  "\n",
335
- "# PLACE the package_to_hub function you've just filled here\n",
336
  "package_to_hub(model=model, # Our trained model\n",
337
  " model_name=model_name, # The name of our trained model \n",
338
  " model_architecture=model_architecture, # The model architecture we used: in our case PPO\n",
339
  " env_id=env_id, # Name of the environment\n",
340
  " eval_env=eval_env, # Evaluation Environment\n",
341
- " repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n",
342
- " commit_message=commit_message)\n"
343
- ]
344
- },
345
- {
346
- "cell_type": "code",
347
- "execution_count": null,
348
- "outputs": [],
349
- "source": [
350
  "eval_env.close()"
351
- ],
352
- "metadata": {
353
- "collapsed": false,
354
- "pycharm": {
355
- "name": "#%%\n"
356
- }
357
- }
358
  }
359
  ],
360
  "metadata": {
@@ -373,7 +337,7 @@
373
  "name": "python",
374
  "nbconvert_exporter": "python",
375
  "pygments_lexer": "ipython3",
376
- "version": "3.7.12"
377
  },
378
  "toc": {
379
  "base_numbering": 1,
@@ -420,4 +384,4 @@
420
  },
421
  "nbformat": 4,
422
  "nbformat_minor": 5
423
- }
 
29
  {
30
  "cell_type": "code",
31
  "execution_count": null,
32
+ "id": "cc1d81f5",
33
+ "metadata": {
34
+ "pycharm": {
35
+ "name": "#%%\n"
36
+ }
37
+ },
38
  "outputs": [],
39
  "source": [
40
  "config = {\n",
41
  " \"policy_type\": \"MlpPolicy\",\n",
42
  " \"env_name\": \"BipedalWalker-v3\",\n",
43
  "}"
44
+ ]
 
 
 
 
 
 
45
  },
46
  {
47
  "cell_type": "code",
48
  "execution_count": null,
49
+ "id": "d9c45ab2",
50
+ "metadata": {
51
+ "pycharm": {
52
+ "name": "#%%\n"
53
+ }
54
+ },
55
  "outputs": [],
56
  "source": [
57
  "run = wandb.init(\n",
 
61
  " monitor_gym=True, # auto-upload the videos of agents playing the game\n",
62
  " save_code=True, # optional\n",
63
  ")"
64
+ ]
 
 
 
 
 
 
65
  },
66
  {
67
  "cell_type": "code",
 
80
  "source": [
81
  "import gym\n",
82
  "\n",
83
+ "\n",
84
  "env = gym.make(\"BipedalWalker-v3\")\n",
85
  "\n",
 
86
  "observation = env.reset()\n",
87
  "\n",
88
  "for _ in range(200):\n",
 
91
  " print(\"Action taken:\", action)\n",
92
  " env.render()\n",
93
  "\n",
 
94
  " # Do this action in the environment and get\n",
95
  " # next_state, reward, done and info\n",
96
  " observation, reward, done, info = env.step(action)\n",
 
141
  {
142
  "cell_type": "code",
143
  "execution_count": null,
144
+ "id": "7ca36c14",
 
 
 
145
  "metadata": {
 
146
  "pycharm": {
147
  "name": "#%%\n"
148
  }
149
+ },
150
+ "outputs": [],
151
+ "source": [
152
+ "eval_env = make_vec_env(\"BipedalWalker-v3\", n_envs=1)"
153
+ ]
154
  },
155
  {
156
  "cell_type": "code",
157
  "execution_count": null,
158
+ "id": "94fe286d",
 
 
 
 
159
  "metadata": {
 
160
  "pycharm": {
161
  "name": "#%%\n"
162
  }
163
+ },
164
+ "outputs": [],
165
+ "source": [
166
+ "callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=300, verbose=1)\n",
167
+ "eval_callback = EvalCallback(eval_env, callback_on_new_best=callback_on_best, verbose=1)"
168
+ ]
169
  },
170
  {
171
  "cell_type": "code",
 
198
  {
199
  "cell_type": "code",
200
  "execution_count": null,
201
+ "id": "65c99875",
 
 
 
202
  "metadata": {
 
203
  "pycharm": {
204
  "name": "#%%\n"
205
  }
206
+ },
207
+ "outputs": [],
208
+ "source": [
209
+ "env_id = 'BipedalWalker-v3'"
210
+ ]
211
  },
212
  {
213
  "cell_type": "code",
214
  "execution_count": null,
215
+ "id": "71b5ef7f",
 
 
 
216
  "metadata": {
 
217
  "pycharm": {
218
  "name": "#%%\n"
219
  }
220
+ },
221
+ "outputs": [],
222
+ "source": [
223
+ "model.learn(total_timesteps=50000000, callback=[WandbCallback() , eval_callback])"
224
+ ]
225
  },
226
  {
227
  "cell_type": "code",
228
  "execution_count": null,
229
+ "id": "b18e1309",
 
 
 
230
  "metadata": {
 
231
  "pycharm": {
232
  "name": "#%%\n"
233
  }
234
+ },
235
+ "outputs": [],
236
+ "source": [
237
+ "model.save('300-Trained.zip')"
238
+ ]
239
  },
240
  {
241
  "cell_type": "code",
 
276
  "eval_env.close()"
277
  ]
278
  },
 
 
 
 
 
 
 
 
 
 
 
 
279
  {
280
  "cell_type": "code",
281
  "execution_count": null,
 
299
  "\n",
300
  "from huggingface_sb3 import package_to_hub\n",
301
  "\n",
 
 
302
  "env_id = \"BipedalWalker-v3\"\n",
303
  "\n",
 
304
  "model_architecture = \"TD3\"\n",
305
  "model_name = \"TD3_BipedalWalker-v3\"\n",
306
  "\n",
 
 
 
307
  "repo_id = \"SuperSecureHuman/BipedalWalker-v3-TD3\"\n",
308
  "\n",
 
309
  "commit_message = \"Upload score 300 trained bipedal walker\"\n",
310
  "\n",
 
311
  "eval_env = DummyVecEnv([lambda: gym.make(env_id)])\n",
312
  "\n",
 
313
  "package_to_hub(model=model, # Our trained model\n",
314
  " model_name=model_name, # The name of our trained model \n",
315
  " model_architecture=model_architecture, # The model architecture we used: in our case PPO\n",
316
  " env_id=env_id, # Name of the environment\n",
317
  " eval_env=eval_env, # Evaluation Environment\n",
318
+ " repo_id=repo_id, # id of the model repository from the Hugging Face Hub\n",
319
+ " commit_message=commit_message)\n",
 
 
 
 
 
 
 
320
  "eval_env.close()"
321
+ ]
 
 
 
 
 
 
322
  }
323
  ],
324
  "metadata": {
 
337
  "name": "python",
338
  "nbconvert_exporter": "python",
339
  "pygments_lexer": "ipython3",
340
+ "version": "3.8.0"
341
  },
342
  "toc": {
343
  "base_numbering": 1,
 
384
  },
385
  "nbformat": 4,
386
  "nbformat_minor": 5
387
+ }