{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "2f3f1b89", "metadata": { "ExecuteTime": { "end_time": "2022-05-06T15:35:55.593757Z", "start_time": "2022-05-06T15:35:54.206954Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import gym\n", "\n", "from stable_baselines3 import TD3\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "from stable_baselines3.common.env_util import make_vec_env\n", "\n", "import wandb\n", "from wandb.integration.sb3 import WandbCallback\n", "from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold" ] }, { "cell_type": "code", "execution_count": null, "id": "cc1d81f5", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "config = {\n", " \"policy_type\": \"MlpPolicy\",\n", " \"env_name\": \"BipedalWalker-v3\",\n", "}" ] }, { "cell_type": "code", "execution_count": null, "id": "d9c45ab2", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "run = wandb.init(\n", " project=\"BiPedalWalker-v3\",\n", " config=config,\n", " sync_tensorboard=True, # auto-upload sb3's tensorboard metrics\n", " monitor_gym=True, # auto-upload the videos of agents playing the game\n", " save_code=True, # optional\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "35ccb2df", "metadata": { "ExecuteTime": { "end_time": "2022-05-06T11:52:04.640671Z", "start_time": "2022-05-06T11:52:00.907411Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import gym\n", "\n", "\n", "env = gym.make(\"BipedalWalker-v3\")\n", "\n", "observation = env.reset()\n", "\n", "for _ in range(200):\n", " # Take a random action\n", " action = env.action_space.sample()\n", " print(\"Action taken:\", action)\n", " env.render()\n", "\n", " # Do this action in the environment and get\n", " # next_state, reward, done and info\n", " observation, reward, done, info = env.step(action)\n", " \n", " # If the game is done (in our case we land, crashed or timeout)\n", " if done:\n", " # Reset the environment\n", " print(\"Environment is reset\")\n", " observation = env.reset()\n" ] }, { "cell_type": "code", "execution_count": null, "id": "9b6a4ef9", "metadata": { "ExecuteTime": { "end_time": "2022-05-06T11:52:07.357076Z", "start_time": "2022-05-06T11:52:07.349795Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "env.close()" ] }, { "cell_type": "code", "execution_count": null, "id": "db2d1377", "metadata": { "ExecuteTime": { "end_time": "2022-05-06T12:11:02.520195Z", "start_time": "2022-05-06T12:11:02.491149Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "env = make_vec_env(\"BipedalWalker-v3\", n_envs=32)" ] }, { "cell_type": "code", "execution_count": null, "id": "7ca36c14", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "eval_env = make_vec_env(\"BipedalWalker-v3\", n_envs=1)" ] }, { "cell_type": "code", "execution_count": null, "id": "94fe286d", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=300, verbose=1)\n", "eval_callback = EvalCallback(eval_env, callback_on_new_best=callback_on_best, verbose=1)" ] }, { "cell_type": "code", "execution_count": null, "id": "a774b23f", "metadata": { "ExecuteTime": { "end_time": "2022-05-06T12:18:14.514611Z", "start_time": "2022-05-06T12:18:14.497888Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "model = TD3(\n", " \"MlpPolicy\",\n", " env,\n", " learning_rate=0.0001,\n", " batch_size=128,\n", " gamma=0.999,\n", " train_freq=32,\n", " gradient_steps=32,\n", " tensorboard_log='model_log/',\n", " verbose=0\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "65c99875", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "env_id = 'BipedalWalker-v3'" ] }, { "cell_type": "code", "execution_count": null, "id": "71b5ef7f", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "model.learn(total_timesteps=50000000, callback=[WandbCallback() , eval_callback])" ] }, { "cell_type": "code", "execution_count": null, "id": "b18e1309", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "model.save('300-Trained.zip')" ] }, { "cell_type": "code", "execution_count": null, "id": "e2e07af6", "metadata": { "ExecuteTime": { "end_time": "2022-05-06T15:36:15.322985Z", "start_time": "2022-05-06T15:36:10.718319Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "model = TD3.load('30M_Trained.zip')" ] }, { "cell_type": "code", "execution_count": null, "id": "07d151f7", "metadata": { "ExecuteTime": { "end_time": "2022-05-06T15:36:41.652903Z", "start_time": "2022-05-06T15:36:22.118438Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "eval_env = gym.make(\"BipedalWalker-v3\")\n", "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=1, deterministic=True, render=True)\n", "print(f\"mean_reward={mean_reward:.2f} +/- {std_reward}\")\n", "eval_env.close()" ] }, { "cell_type": "code", "execution_count": null, "id": "e027a847", "metadata": { "ExecuteTime": { "end_time": "2022-05-06T15:40:59.811143Z", "start_time": "2022-05-06T15:40:59.670690Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import gym\n", "\n", "from stable_baselines3 import PPO\n", "from stable_baselines3.common.vec_env import DummyVecEnv\n", "from stable_baselines3.common.env_util import make_vec_env\n", "\n", "from huggingface_sb3 import package_to_hub\n", "\n", "env_id = \"BipedalWalker-v3\"\n", "\n", "model_architecture = \"TD3\"\n", "model_name = \"TD3_BipedalWalker-v3\"\n", "\n", "repo_id = \"SuperSecureHuman/BipedalWalker-v3-TD3\"\n", "\n", "commit_message = \"Upload score 300 trained bipedal walker\"\n", "\n", "eval_env = DummyVecEnv([lambda: gym.make(env_id)])\n", "\n", "package_to_hub(model=model, # Our trained model\n", " model_name=model_name, # The name of our trained model \n", " model_architecture=model_architecture, # The model architecture we used: in our case PPO\n", " env_id=env_id, # Name of the environment\n", " eval_env=eval_env, # Evaluation Environment\n", " repo_id=repo_id, # id of the model repository from the Hugging Face Hub\n", " commit_message=commit_message)\n", "eval_env.close()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }