{ "cells": [ { "cell_type": "markdown", "id": "8b7bcd57", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "## install dependencies and create a virtual screen" ] }, { "cell_type": "code", "execution_count": 1, "id": "92ca867e", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from pyvirtualdisplay import Display\n", "\n", "virtual_display = Display(visible=0, size=(1400, 900))\n", "virtual_display.start()" ] }, { "cell_type": "code", "execution_count": 2, "id": "ac38f664", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import gym\n", "\n", "from huggingface_sb3 import load_from_hub, package_to_hub, push_to_hub\n", "from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.\n", "\n", "from stable_baselines3 import PPO\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "from stable_baselines3.common.env_util import make_vec_env" ] }, { "cell_type": "code", "execution_count": 3, "id": "f5969bb1", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Action taken: 1\n", "Action taken: 0\n", "Action taken: 1\n", "Action taken: 1\n", "Action taken: 1\n", "Action taken: 1\n", "Action taken: 3\n", "Action taken: 2\n", "Action taken: 3\n", "Action taken: 0\n", "Action taken: 2\n", "Action taken: 3\n", "Action taken: 2\n", "Action taken: 0\n", "Action taken: 3\n", "Action taken: 3\n", "Action taken: 2\n", "Action taken: 3\n", "Action taken: 2\n", "Action taken: 2\n" ] } ], "source": [ "import gym\n", "\n", "# First, we create our environment called LunarLander-v2\n", "env = gym.make(\"LunarLander-v2\")\n", "\n", "# Then we reset this environment\n", "observation = env.reset()\n", "\n", "for _ in range(20):\n", " # Take a random action\n", " action = env.action_space.sample()\n", " print(\"Action taken:\", action)\n", "\n", " # Do this action in the environment and get\n", " # next_state, reward, done and info\n", " observation, reward, done, info = env.step(action)\n", "\n", " # If the game is done (in our case we land, crashed or timeout)\n", " if done:\n", " # Reset the environment\n", " print(\"Environment is reset\")\n", " observation = env.reset()" ] }, { "cell_type": "code", "execution_count": 4, "id": "c76e00e1", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "_____OBSERVATION SPACE_____ \n", "\n", "Observation Space Shape (8,)\n", "Sample observation [-0.4549146 -1.0277667 -0.4214998 0.72812086 -1.4273707 0.23020315\n", " -0.03087789 0.7445605 ]\n" ] } ], "source": [ "# We create our environment with gym.make(\"\")\n", "env = gym.make(\"LunarLander-v2\")\n", "env.reset()\n", "print(\"_____OBSERVATION SPACE_____ \\n\")\n", "print(\"Observation Space Shape\", env.observation_space.shape)\n", "print(\"Sample observation\", env.observation_space.sample()) # Get a random observation" ] }, { "cell_type": "code", "execution_count": 5, "id": "ae259fbb", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " _____ACTION SPACE_____ \n", "\n", "Action Space Shape 4\n", "Action Space Sample 0\n" ] } ], "source": [ "print(\"\\n _____ACTION SPACE_____ \\n\")\n", "print(\"Action Space Shape\", env.action_space.n)\n", "print(\"Action Space Sample\", env.action_space.sample()) # Take a random action" ] }, { "cell_type": "code", "execution_count": 6, "id": "8434c59f", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "# Create the environment\n", "env = make_vec_env('LunarLander-v2', n_envs=16)" ] }, { "cell_type": "code", "execution_count": 7, "id": "3ef072df", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using cpu device\n" ] } ], "source": [ "# SOLUTION\n", "# We added some parameters to accelerate the training\n", "model = PPO(\n", " policy = 'MlpPolicy',\n", " env = env,\n", " n_steps = 1024,\n", " batch_size = 64,\n", " n_epochs = 4,\n", " gamma = 0.999,\n", " gae_lambda = 0.98,\n", " ent_coef = 0.01,\n", " verbose=1)" ] }, { "cell_type": "code", "execution_count": 10, "id": "10c8e90e", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "---------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 119 |\n", "| ep_rew_mean | -51.8 |\n", "| time/ | |\n", "| fps | 9037 |\n", "| iterations | 1 |\n", "| time_elapsed | 1 |\n", "| total_timesteps | 16384 |\n", "---------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 122 |\n", "| ep_rew_mean | -38.1 |\n", "| time/ | |\n", "| fps | 6271 |\n", "| iterations | 2 |\n", "| time_elapsed | 5 |\n", "| total_timesteps | 32768 |\n", "| train/ | |\n", "| approx_kl | 0.007412333 |\n", "| clip_fraction | 0.0399 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.27 |\n", "| explained_variance | -1.31e-06 |\n", "| learning_rate | 0.0003 |\n", "| loss | 133 |\n", "| n_updates | 32 |\n", "| policy_gradient_loss | -0.00488 |\n", "| value_loss | 293 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 136 |\n", "| ep_rew_mean | -21.8 |\n", "| time/ | |\n", "| fps | 5567 |\n", "| iterations | 3 |\n", "| time_elapsed | 8 |\n", "| total_timesteps | 49152 |\n", "| train/ | |\n", "| approx_kl | 0.0058661494 |\n", "| clip_fraction | 0.0297 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.23 |\n", "| explained_variance | 0.000282 |\n", "| learning_rate | 0.0003 |\n", "| loss | 166 |\n", "| n_updates | 36 |\n", "| policy_gradient_loss | -0.00286 |\n", "| value_loss | 352 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 161 |\n", "| ep_rew_mean | -23.2 |\n", "| time/ | |\n", "| fps | 5172 |\n", "| iterations | 4 |\n", "| time_elapsed | 12 |\n", "| total_timesteps | 65536 |\n", "| train/ | |\n", "| approx_kl | 0.011076563 |\n", "| clip_fraction | 0.0683 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.18 |\n", "| explained_variance | 0.000298 |\n", "| learning_rate | 0.0003 |\n", "| loss | 144 |\n", "| n_updates | 40 |\n", "| policy_gradient_loss | -0.00303 |\n", "| value_loss | 416 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 150 |\n", "| ep_rew_mean | -17.1 |\n", "| time/ | |\n", "| fps | 4823 |\n", "| iterations | 5 |\n", "| time_elapsed | 16 |\n", "| total_timesteps | 81920 |\n", "| train/ | |\n", "| approx_kl | 0.0068000476 |\n", "| clip_fraction | 0.0646 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.15 |\n", "| explained_variance | -0.0146 |\n", "| learning_rate | 0.0003 |\n", "| loss | 308 |\n", "| n_updates | 44 |\n", "| policy_gradient_loss | -0.00208 |\n", "| value_loss | 579 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 174 |\n", "| ep_rew_mean | -5.02 |\n", "| time/ | |\n", "| fps | 4273 |\n", "| iterations | 6 |\n", "| time_elapsed | 23 |\n", "| total_timesteps | 98304 |\n", "| train/ | |\n", "| approx_kl | 0.005411272 |\n", "| clip_fraction | 0.0218 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.16 |\n", "| explained_variance | -0.000786 |\n", "| learning_rate | 0.0003 |\n", "| loss | 194 |\n", "| n_updates | 48 |\n", "| policy_gradient_loss | -0.000952 |\n", "| value_loss | 563 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 243 |\n", "| ep_rew_mean | 2.98 |\n", "| time/ | |\n", "| fps | 3655 |\n", "| iterations | 7 |\n", "| time_elapsed | 31 |\n", "| total_timesteps | 114688 |\n", "| train/ | |\n", "| approx_kl | 0.006326018 |\n", "| clip_fraction | 0.0391 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.15 |\n", "| explained_variance | 0.00313 |\n", "| learning_rate | 0.0003 |\n", "| loss | 223 |\n", "| n_updates | 52 |\n", "| policy_gradient_loss | -0.00191 |\n", "| value_loss | 520 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 345 |\n", "| ep_rew_mean | 8.38 |\n", "| time/ | |\n", "| fps | 3051 |\n", "| iterations | 8 |\n", "| time_elapsed | 42 |\n", "| total_timesteps | 131072 |\n", "| train/ | |\n", "| approx_kl | 0.0068710614 |\n", "| clip_fraction | 0.0534 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.13 |\n", "| explained_variance | -0.0101 |\n", "| learning_rate | 0.0003 |\n", "| loss | 143 |\n", "| n_updates | 56 |\n", "| policy_gradient_loss | -0.00258 |\n", "| value_loss | 515 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 439 |\n", "| ep_rew_mean | 18.6 |\n", "| time/ | |\n", "| fps | 2686 |\n", "| iterations | 9 |\n", "| time_elapsed | 54 |\n", "| total_timesteps | 147456 |\n", "| train/ | |\n", "| approx_kl | 0.0050993008 |\n", "| clip_fraction | 0.0221 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.14 |\n", "| explained_variance | 0.0406 |\n", "| learning_rate | 0.0003 |\n", "| loss | 179 |\n", "| n_updates | 60 |\n", "| policy_gradient_loss | -0.00245 |\n", "| value_loss | 328 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 530 |\n", "| ep_rew_mean | 25.5 |\n", "| time/ | |\n", "| fps | 2466 |\n", "| iterations | 10 |\n", "| time_elapsed | 66 |\n", "| total_timesteps | 163840 |\n", "| train/ | |\n", "| approx_kl | 0.004935194 |\n", "| clip_fraction | 0.0169 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.18 |\n", "| explained_variance | 0.189 |\n", "| learning_rate | 0.0003 |\n", "| loss | 121 |\n", "| n_updates | 64 |\n", "| policy_gradient_loss | -0.00138 |\n", "| value_loss | 304 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 625 |\n", "| ep_rew_mean | 34.5 |\n", "| time/ | |\n", "| fps | 2322 |\n", "| iterations | 11 |\n", "| time_elapsed | 77 |\n", "| total_timesteps | 180224 |\n", "| train/ | |\n", "| approx_kl | 0.005122483 |\n", "| clip_fraction | 0.0386 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.14 |\n", "| explained_variance | 0.227 |\n", "| learning_rate | 0.0003 |\n", "| loss | 136 |\n", "| n_updates | 68 |\n", "| policy_gradient_loss | -0.00263 |\n", "| value_loss | 310 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 682 |\n", "| ep_rew_mean | 45.2 |\n", "| time/ | |\n", "| fps | 2166 |\n", "| iterations | 12 |\n", "| time_elapsed | 90 |\n", "| total_timesteps | 196608 |\n", "| train/ | |\n", "| approx_kl | 0.0049172407 |\n", "| clip_fraction | 0.0307 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.12 |\n", "| explained_variance | 0.22 |\n", "| learning_rate | 0.0003 |\n", "| loss | 202 |\n", "| n_updates | 72 |\n", "| policy_gradient_loss | -0.00163 |\n", "| value_loss | 392 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 725 |\n", "| ep_rew_mean | 45.5 |\n", "| time/ | |\n", "| fps | 2080 |\n", "| iterations | 13 |\n", "| time_elapsed | 102 |\n", "| total_timesteps | 212992 |\n", "| train/ | |\n", "| approx_kl | 0.0057060006 |\n", "| clip_fraction | 0.0515 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.12 |\n", "| explained_variance | 0.411 |\n", "| learning_rate | 0.0003 |\n", "| loss | 92 |\n", "| n_updates | 76 |\n", "| policy_gradient_loss | -0.00125 |\n", "| value_loss | 212 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 732 |\n", "| ep_rew_mean | 47.6 |\n", "| time/ | |\n", "| fps | 1958 |\n", "| iterations | 14 |\n", "| time_elapsed | 117 |\n", "| total_timesteps | 229376 |\n", "| train/ | |\n", "| approx_kl | 0.0069923066 |\n", "| clip_fraction | 0.0428 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.09 |\n", "| explained_variance | 0.383 |\n", "| learning_rate | 0.0003 |\n", "| loss | 131 |\n", "| n_updates | 80 |\n", "| policy_gradient_loss | -0.0029 |\n", "| value_loss | 260 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 792 |\n", "| ep_rew_mean | 56.8 |\n", "| time/ | |\n", "| fps | 1907 |\n", "| iterations | 15 |\n", "| time_elapsed | 128 |\n", "| total_timesteps | 245760 |\n", "| train/ | |\n", "| approx_kl | 0.0046337834 |\n", "| clip_fraction | 0.0305 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.1 |\n", "| explained_variance | 0.613 |\n", "| learning_rate | 0.0003 |\n", "| loss | 49.1 |\n", "| n_updates | 84 |\n", "| policy_gradient_loss | -0.000518 |\n", "| value_loss | 119 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 813 |\n", "| ep_rew_mean | 63.5 |\n", "| time/ | |\n", "| fps | 1848 |\n", "| iterations | 16 |\n", "| time_elapsed | 141 |\n", "| total_timesteps | 262144 |\n", "| train/ | |\n", "| approx_kl | 0.0047494075 |\n", "| clip_fraction | 0.032 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.09 |\n", "| explained_variance | 0.486 |\n", "| learning_rate | 0.0003 |\n", "| loss | 160 |\n", "| n_updates | 88 |\n", "| policy_gradient_loss | -0.000461 |\n", "| value_loss | 242 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 825 |\n", "| ep_rew_mean | 65.1 |\n", "| time/ | |\n", "| fps | 1791 |\n", "| iterations | 17 |\n", "| time_elapsed | 155 |\n", "| total_timesteps | 278528 |\n", "| train/ | |\n", "| approx_kl | 0.004002508 |\n", "| clip_fraction | 0.0381 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.07 |\n", "| explained_variance | 0.62 |\n", "| learning_rate | 0.0003 |\n", "| loss | 73.9 |\n", "| n_updates | 92 |\n", "| policy_gradient_loss | -0.000932 |\n", "| value_loss | 181 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 861 |\n", "| ep_rew_mean | 75 |\n", "| time/ | |\n", "| fps | 1743 |\n", "| iterations | 18 |\n", "| time_elapsed | 169 |\n", "| total_timesteps | 294912 |\n", "| train/ | |\n", "| approx_kl | 0.0054403553 |\n", "| clip_fraction | 0.042 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.08 |\n", "| explained_variance | 0.782 |\n", "| learning_rate | 0.0003 |\n", "| loss | 82.3 |\n", "| n_updates | 96 |\n", "| policy_gradient_loss | -0.00199 |\n", "| value_loss | 103 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 839 |\n", "| ep_rew_mean | 73.4 |\n", "| time/ | |\n", "| fps | 1707 |\n", "| iterations | 19 |\n", "| time_elapsed | 182 |\n", "| total_timesteps | 311296 |\n", "| train/ | |\n", "| approx_kl | 0.0046779215 |\n", "| clip_fraction | 0.0418 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.07 |\n", "| explained_variance | 0.881 |\n", "| learning_rate | 0.0003 |\n", "| loss | 11.9 |\n", "| n_updates | 100 |\n", "| policy_gradient_loss | -0.00077 |\n", "| value_loss | 62 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 851 |\n", "| ep_rew_mean | 77.1 |\n", "| time/ | |\n", "| fps | 1682 |\n", "| iterations | 20 |\n", "| time_elapsed | 194 |\n", "| total_timesteps | 327680 |\n", "| train/ | |\n", "| approx_kl | 0.005090245 |\n", "| clip_fraction | 0.0249 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.09 |\n", "| explained_variance | 0.823 |\n", "| learning_rate | 0.0003 |\n", "| loss | 69.4 |\n", "| n_updates | 104 |\n", "| policy_gradient_loss | -0.000835 |\n", "| value_loss | 139 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 866 |\n", "| ep_rew_mean | 84.2 |\n", "| time/ | |\n", "| fps | 1665 |\n", "| iterations | 21 |\n", "| time_elapsed | 206 |\n", "| total_timesteps | 344064 |\n", "| train/ | |\n", "| approx_kl | 0.0040319515 |\n", "| clip_fraction | 0.03 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.07 |\n", "| explained_variance | 0.861 |\n", "| learning_rate | 0.0003 |\n", "| loss | 42.7 |\n", "| n_updates | 108 |\n", "| policy_gradient_loss | -0.00147 |\n", "| value_loss | 121 |\n", "------------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 886 |\n", "| ep_rew_mean | 94.9 |\n", "| time/ | |\n", "| fps | 1650 |\n", "| iterations | 22 |\n", "| time_elapsed | 218 |\n", "| total_timesteps | 360448 |\n", "| train/ | |\n", "| approx_kl | 0.00445921 |\n", "| clip_fraction | 0.0304 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.08 |\n", "| explained_variance | 0.927 |\n", "| learning_rate | 0.0003 |\n", "| loss | 91 |\n", "| n_updates | 112 |\n", "| policy_gradient_loss | -0.00178 |\n", "| value_loss | 48.5 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 878 |\n", "| ep_rew_mean | 99.8 |\n", "| time/ | |\n", "| fps | 1637 |\n", "| iterations | 23 |\n", "| time_elapsed | 230 |\n", "| total_timesteps | 376832 |\n", "| train/ | |\n", "| approx_kl | 0.004755725 |\n", "| clip_fraction | 0.0244 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.06 |\n", "| explained_variance | 0.905 |\n", "| learning_rate | 0.0003 |\n", "| loss | 6.82 |\n", "| n_updates | 116 |\n", "| policy_gradient_loss | -0.00121 |\n", "| value_loss | 77.5 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 883 |\n", "| ep_rew_mean | 108 |\n", "| time/ | |\n", "| fps | 1623 |\n", "| iterations | 24 |\n", "| time_elapsed | 242 |\n", "| total_timesteps | 393216 |\n", "| train/ | |\n", "| approx_kl | 0.008141588 |\n", "| clip_fraction | 0.0556 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.06 |\n", "| explained_variance | 0.949 |\n", "| learning_rate | 0.0003 |\n", "| loss | 12.5 |\n", "| n_updates | 120 |\n", "| policy_gradient_loss | -0.000266 |\n", "| value_loss | 49.2 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 867 |\n", "| ep_rew_mean | 112 |\n", "| time/ | |\n", "| fps | 1616 |\n", "| iterations | 25 |\n", "| time_elapsed | 253 |\n", "| total_timesteps | 409600 |\n", "| train/ | |\n", "| approx_kl | 0.006334585 |\n", "| clip_fraction | 0.0376 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.06 |\n", "| explained_variance | 0.898 |\n", "| learning_rate | 0.0003 |\n", "| loss | 32.9 |\n", "| n_updates | 124 |\n", "| policy_gradient_loss | -0.0011 |\n", "| value_loss | 115 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 859 |\n", "| ep_rew_mean | 114 |\n", "| time/ | |\n", "| fps | 1603 |\n", "| iterations | 26 |\n", "| time_elapsed | 265 |\n", "| total_timesteps | 425984 |\n", "| train/ | |\n", "| approx_kl | 0.003915206 |\n", "| clip_fraction | 0.0123 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.05 |\n", "| explained_variance | 0.834 |\n", "| learning_rate | 0.0003 |\n", "| loss | 76.3 |\n", "| n_updates | 128 |\n", "| policy_gradient_loss | 0.000402 |\n", "| value_loss | 235 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 863 |\n", "| ep_rew_mean | 114 |\n", "| time/ | |\n", "| fps | 1592 |\n", "| iterations | 27 |\n", "| time_elapsed | 277 |\n", "| total_timesteps | 442368 |\n", "| train/ | |\n", "| approx_kl | 0.003985384 |\n", "| clip_fraction | 0.031 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.06 |\n", "| explained_variance | 0.945 |\n", "| learning_rate | 0.0003 |\n", "| loss | 21.6 |\n", "| n_updates | 132 |\n", "| policy_gradient_loss | 7.45e-05 |\n", "| value_loss | 56.3 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 859 |\n", "| ep_rew_mean | 113 |\n", "| time/ | |\n", "| fps | 1582 |\n", "| iterations | 28 |\n", "| time_elapsed | 289 |\n", "| total_timesteps | 458752 |\n", "| train/ | |\n", "| approx_kl | 0.0073505742 |\n", "| clip_fraction | 0.0349 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.05 |\n", "| explained_variance | 0.971 |\n", "| learning_rate | 0.0003 |\n", "| loss | 7.98 |\n", "| n_updates | 136 |\n", "| policy_gradient_loss | -0.00145 |\n", "| value_loss | 27.6 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 871 |\n", "| ep_rew_mean | 114 |\n", "| time/ | |\n", "| fps | 1570 |\n", "| iterations | 29 |\n", "| time_elapsed | 302 |\n", "| total_timesteps | 475136 |\n", "| train/ | |\n", "| approx_kl | 0.004851341 |\n", "| clip_fraction | 0.0209 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.02 |\n", "| explained_variance | 0.944 |\n", "| learning_rate | 0.0003 |\n", "| loss | 8.32 |\n", "| n_updates | 140 |\n", "| policy_gradient_loss | -0.00105 |\n", "| value_loss | 51.9 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 896 |\n", "| ep_rew_mean | 119 |\n", "| time/ | |\n", "| fps | 1559 |\n", "| iterations | 30 |\n", "| time_elapsed | 315 |\n", "| total_timesteps | 491520 |\n", "| train/ | |\n", "| approx_kl | 0.006854578 |\n", "| clip_fraction | 0.0472 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.04 |\n", "| explained_variance | 0.952 |\n", "| learning_rate | 0.0003 |\n", "| loss | 77.1 |\n", "| n_updates | 144 |\n", "| policy_gradient_loss | -0.000411 |\n", "| value_loss | 53.5 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 948 |\n", "| ep_rew_mean | 125 |\n", "| time/ | |\n", "| fps | 1546 |\n", "| iterations | 31 |\n", "| time_elapsed | 328 |\n", "| total_timesteps | 507904 |\n", "| train/ | |\n", "| approx_kl | 0.007134228 |\n", "| clip_fraction | 0.0647 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.02 |\n", "| explained_variance | 0.983 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.2 |\n", "| n_updates | 148 |\n", "| policy_gradient_loss | -0.00169 |\n", "| value_loss | 16.3 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 956 |\n", "| ep_rew_mean | 127 |\n", "| time/ | |\n", "| fps | 1535 |\n", "| iterations | 32 |\n", "| time_elapsed | 341 |\n", "| total_timesteps | 524288 |\n", "| train/ | |\n", "| approx_kl | 0.005573826 |\n", "| clip_fraction | 0.0399 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.02 |\n", "| explained_variance | 0.988 |\n", "| learning_rate | 0.0003 |\n", "| loss | 6.44 |\n", "| n_updates | 152 |\n", "| policy_gradient_loss | -0.000776 |\n", "| value_loss | 11.7 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 962 |\n", "| ep_rew_mean | 131 |\n", "| time/ | |\n", "| fps | 1526 |\n", "| iterations | 33 |\n", "| time_elapsed | 354 |\n", "| total_timesteps | 540672 |\n", "| train/ | |\n", "| approx_kl | 0.0048838793 |\n", "| clip_fraction | 0.0352 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.04 |\n", "| explained_variance | 0.988 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.25 |\n", "| n_updates | 156 |\n", "| policy_gradient_loss | -0.000739 |\n", "| value_loss | 10.5 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 977 |\n", "| ep_rew_mean | 134 |\n", "| time/ | |\n", "| fps | 1518 |\n", "| iterations | 34 |\n", "| time_elapsed | 366 |\n", "| total_timesteps | 557056 |\n", "| train/ | |\n", "| approx_kl | 0.004133114 |\n", "| clip_fraction | 0.035 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -1.01 |\n", "| explained_variance | 0.977 |\n", "| learning_rate | 0.0003 |\n", "| loss | 9.76 |\n", "| n_updates | 160 |\n", "| policy_gradient_loss | -0.000917 |\n", "| value_loss | 27 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 993 |\n", "| ep_rew_mean | 137 |\n", "| time/ | |\n", "| fps | 1513 |\n", "| iterations | 35 |\n", "| time_elapsed | 378 |\n", "| total_timesteps | 573440 |\n", "| train/ | |\n", "| approx_kl | 0.004068788 |\n", "| clip_fraction | 0.0311 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.988 |\n", "| explained_variance | 0.99 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.39 |\n", "| n_updates | 164 |\n", "| policy_gradient_loss | 0.000237 |\n", "| value_loss | 11.9 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 970 |\n", "| ep_rew_mean | 135 |\n", "| time/ | |\n", "| fps | 1508 |\n", "| iterations | 36 |\n", "| time_elapsed | 390 |\n", "| total_timesteps | 589824 |\n", "| train/ | |\n", "| approx_kl | 0.005884242 |\n", "| clip_fraction | 0.0563 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.987 |\n", "| explained_variance | 0.991 |\n", "| learning_rate | 0.0003 |\n", "| loss | 4.13 |\n", "| n_updates | 168 |\n", "| policy_gradient_loss | -0.0016 |\n", "| value_loss | 10.2 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 963 |\n", "| ep_rew_mean | 136 |\n", "| time/ | |\n", "| fps | 1507 |\n", "| iterations | 37 |\n", "| time_elapsed | 402 |\n", "| total_timesteps | 606208 |\n", "| train/ | |\n", "| approx_kl | 0.004795673 |\n", "| clip_fraction | 0.0281 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.976 |\n", "| explained_variance | 0.969 |\n", "| learning_rate | 0.0003 |\n", "| loss | 24.1 |\n", "| n_updates | 172 |\n", "| policy_gradient_loss | -0.000673 |\n", "| value_loss | 43.3 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 962 |\n", "| ep_rew_mean | 137 |\n", "| time/ | |\n", "| fps | 1500 |\n", "| iterations | 38 |\n", "| time_elapsed | 414 |\n", "| total_timesteps | 622592 |\n", "| train/ | |\n", "| approx_kl | 0.003379224 |\n", "| clip_fraction | 0.051 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.979 |\n", "| explained_variance | 0.981 |\n", "| learning_rate | 0.0003 |\n", "| loss | 9.05 |\n", "| n_updates | 176 |\n", "| policy_gradient_loss | -1.03e-05 |\n", "| value_loss | 17.4 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 963 |\n", "| ep_rew_mean | 135 |\n", "| time/ | |\n", "| fps | 1494 |\n", "| iterations | 39 |\n", "| time_elapsed | 427 |\n", "| total_timesteps | 638976 |\n", "| train/ | |\n", "| approx_kl | 0.005858536 |\n", "| clip_fraction | 0.0534 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.963 |\n", "| explained_variance | 0.975 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.65 |\n", "| n_updates | 180 |\n", "| policy_gradient_loss | -0.000952 |\n", "| value_loss | 28.8 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 956 |\n", "| ep_rew_mean | 134 |\n", "| time/ | |\n", "| fps | 1484 |\n", "| iterations | 40 |\n", "| time_elapsed | 441 |\n", "| total_timesteps | 655360 |\n", "| train/ | |\n", "| approx_kl | 0.0066601937 |\n", "| clip_fraction | 0.0405 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.945 |\n", "| explained_variance | 0.982 |\n", "| learning_rate | 0.0003 |\n", "| loss | 5.69 |\n", "| n_updates | 184 |\n", "| policy_gradient_loss | -4.42e-05 |\n", "| value_loss | 22.7 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 957 |\n", "| ep_rew_mean | 134 |\n", "| time/ | |\n", "| fps | 1479 |\n", "| iterations | 41 |\n", "| time_elapsed | 454 |\n", "| total_timesteps | 671744 |\n", "| train/ | |\n", "| approx_kl | 0.0055100834 |\n", "| clip_fraction | 0.0513 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.941 |\n", "| explained_variance | 0.981 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.98 |\n", "| n_updates | 188 |\n", "| policy_gradient_loss | -2.25e-05 |\n", "| value_loss | 24.4 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 969 |\n", "| ep_rew_mean | 136 |\n", "| time/ | |\n", "| fps | 1473 |\n", "| iterations | 42 |\n", "| time_elapsed | 467 |\n", "| total_timesteps | 688128 |\n", "| train/ | |\n", "| approx_kl | 0.005155311 |\n", "| clip_fraction | 0.0426 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.943 |\n", "| explained_variance | 0.98 |\n", "| learning_rate | 0.0003 |\n", "| loss | 13.9 |\n", "| n_updates | 192 |\n", "| policy_gradient_loss | -0.00124 |\n", "| value_loss | 25 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 975 |\n", "| ep_rew_mean | 138 |\n", "| time/ | |\n", "| fps | 1469 |\n", "| iterations | 43 |\n", "| time_elapsed | 479 |\n", "| total_timesteps | 704512 |\n", "| train/ | |\n", "| approx_kl | 0.0052159606 |\n", "| clip_fraction | 0.0508 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.914 |\n", "| explained_variance | 0.982 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.45 |\n", "| n_updates | 196 |\n", "| policy_gradient_loss | -0.000953 |\n", "| value_loss | 24.5 |\n", "------------------------------------------\n", "----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 962 |\n", "| ep_rew_mean | 137 |\n", "| time/ | |\n", "| fps | 1469 |\n", "| iterations | 44 |\n", "| time_elapsed | 490 |\n", "| total_timesteps | 720896 |\n", "| train/ | |\n", "| approx_kl | 0.00491371 |\n", "| clip_fraction | 0.0475 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.869 |\n", "| explained_variance | 0.995 |\n", "| learning_rate | 0.0003 |\n", "| loss | 3.02 |\n", "| n_updates | 200 |\n", "| policy_gradient_loss | -0.000853 |\n", "| value_loss | 5.6 |\n", "----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 961 |\n", "| ep_rew_mean | 139 |\n", "| time/ | |\n", "| fps | 1466 |\n", "| iterations | 45 |\n", "| time_elapsed | 502 |\n", "| total_timesteps | 737280 |\n", "| train/ | |\n", "| approx_kl | 0.004709458 |\n", "| clip_fraction | 0.0262 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.852 |\n", "| explained_variance | 0.97 |\n", "| learning_rate | 0.0003 |\n", "| loss | 80.6 |\n", "| n_updates | 204 |\n", "| policy_gradient_loss | -0.000865 |\n", "| value_loss | 31.4 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 952 |\n", "| ep_rew_mean | 143 |\n", "| time/ | |\n", "| fps | 1470 |\n", "| iterations | 46 |\n", "| time_elapsed | 512 |\n", "| total_timesteps | 753664 |\n", "| train/ | |\n", "| approx_kl | 0.0029388377 |\n", "| clip_fraction | 0.0486 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.852 |\n", "| explained_variance | 0.985 |\n", "| learning_rate | 0.0003 |\n", "| loss | 2.2 |\n", "| n_updates | 208 |\n", "| policy_gradient_loss | -0.0003 |\n", "| value_loss | 15.8 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 942 |\n", "| ep_rew_mean | 154 |\n", "| time/ | |\n", "| fps | 1469 |\n", "| iterations | 47 |\n", "| time_elapsed | 523 |\n", "| total_timesteps | 770048 |\n", "| train/ | |\n", "| approx_kl | 0.0039927866 |\n", "| clip_fraction | 0.0432 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.793 |\n", "| explained_variance | 0.938 |\n", "| learning_rate | 0.0003 |\n", "| loss | 51 |\n", "| n_updates | 212 |\n", "| policy_gradient_loss | -0.00173 |\n", "| value_loss | 97 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 920 |\n", "| ep_rew_mean | 157 |\n", "| time/ | |\n", "| fps | 1470 |\n", "| iterations | 48 |\n", "| time_elapsed | 534 |\n", "| total_timesteps | 786432 |\n", "| train/ | |\n", "| approx_kl | 0.005898821 |\n", "| clip_fraction | 0.0726 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.771 |\n", "| explained_variance | 0.926 |\n", "| learning_rate | 0.0003 |\n", "| loss | 87.1 |\n", "| n_updates | 216 |\n", "| policy_gradient_loss | -0.00287 |\n", "| value_loss | 96.7 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 870 |\n", "| ep_rew_mean | 178 |\n", "| time/ | |\n", "| fps | 1472 |\n", "| iterations | 49 |\n", "| time_elapsed | 545 |\n", "| total_timesteps | 802816 |\n", "| train/ | |\n", "| approx_kl | 0.0052704774 |\n", "| clip_fraction | 0.0525 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.761 |\n", "| explained_variance | 0.93 |\n", "| learning_rate | 0.0003 |\n", "| loss | 10.1 |\n", "| n_updates | 220 |\n", "| policy_gradient_loss | -0.000997 |\n", "| value_loss | 102 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 725 |\n", "| ep_rew_mean | 210 |\n", "| time/ | |\n", "| fps | 1479 |\n", "| iterations | 50 |\n", "| time_elapsed | 553 |\n", "| total_timesteps | 819200 |\n", "| train/ | |\n", "| approx_kl | 0.0073069884 |\n", "| clip_fraction | 0.0854 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.733 |\n", "| explained_variance | 0.86 |\n", "| learning_rate | 0.0003 |\n", "| loss | 79.3 |\n", "| n_updates | 224 |\n", "| policy_gradient_loss | -0.00339 |\n", "| value_loss | 220 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 550 |\n", "| ep_rew_mean | 237 |\n", "| time/ | |\n", "| fps | 1489 |\n", "| iterations | 51 |\n", "| time_elapsed | 560 |\n", "| total_timesteps | 835584 |\n", "| train/ | |\n", "| approx_kl | 0.008809601 |\n", "| clip_fraction | 0.0902 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.747 |\n", "| explained_variance | 0.763 |\n", "| learning_rate | 0.0003 |\n", "| loss | 160 |\n", "| n_updates | 228 |\n", "| policy_gradient_loss | -0.00364 |\n", "| value_loss | 327 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 424 |\n", "| ep_rew_mean | 249 |\n", "| time/ | |\n", "| fps | 1501 |\n", "| iterations | 52 |\n", "| time_elapsed | 567 |\n", "| total_timesteps | 851968 |\n", "| train/ | |\n", "| approx_kl | 0.007010665 |\n", "| clip_fraction | 0.0567 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.771 |\n", "| explained_variance | 0.74 |\n", "| learning_rate | 0.0003 |\n", "| loss | 99.1 |\n", "| n_updates | 232 |\n", "| policy_gradient_loss | -0.0019 |\n", "| value_loss | 250 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 400 |\n", "| ep_rew_mean | 247 |\n", "| time/ | |\n", "| fps | 1514 |\n", "| iterations | 53 |\n", "| time_elapsed | 573 |\n", "| total_timesteps | 868352 |\n", "| train/ | |\n", "| approx_kl | 0.0056736926 |\n", "| clip_fraction | 0.0466 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.799 |\n", "| explained_variance | 0.88 |\n", "| learning_rate | 0.0003 |\n", "| loss | 49.7 |\n", "| n_updates | 236 |\n", "| policy_gradient_loss | -0.00104 |\n", "| value_loss | 148 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 378 |\n", "| ep_rew_mean | 243 |\n", "| time/ | |\n", "| fps | 1523 |\n", "| iterations | 54 |\n", "| time_elapsed | 580 |\n", "| total_timesteps | 884736 |\n", "| train/ | |\n", "| approx_kl | 0.0038143774 |\n", "| clip_fraction | 0.0327 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.817 |\n", "| explained_variance | 0.79 |\n", "| learning_rate | 0.0003 |\n", "| loss | 30.4 |\n", "| n_updates | 240 |\n", "| policy_gradient_loss | -0.000752 |\n", "| value_loss | 120 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 376 |\n", "| ep_rew_mean | 244 |\n", "| time/ | |\n", "| fps | 1534 |\n", "| iterations | 55 |\n", "| time_elapsed | 587 |\n", "| total_timesteps | 901120 |\n", "| train/ | |\n", "| approx_kl | 0.0034903912 |\n", "| clip_fraction | 0.0457 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.833 |\n", "| explained_variance | 0.808 |\n", "| learning_rate | 0.0003 |\n", "| loss | 22.9 |\n", "| n_updates | 244 |\n", "| policy_gradient_loss | -0.000512 |\n", "| value_loss | 156 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 377 |\n", "| ep_rew_mean | 249 |\n", "| time/ | |\n", "| fps | 1544 |\n", "| iterations | 56 |\n", "| time_elapsed | 593 |\n", "| total_timesteps | 917504 |\n", "| train/ | |\n", "| approx_kl | 0.004484849 |\n", "| clip_fraction | 0.0322 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.849 |\n", "| explained_variance | 0.923 |\n", "| learning_rate | 0.0003 |\n", "| loss | 22.8 |\n", "| n_updates | 248 |\n", "| policy_gradient_loss | -0.000693 |\n", "| value_loss | 53.7 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 357 |\n", "| ep_rew_mean | 257 |\n", "| time/ | |\n", "| fps | 1556 |\n", "| iterations | 57 |\n", "| time_elapsed | 600 |\n", "| total_timesteps | 933888 |\n", "| train/ | |\n", "| approx_kl | 0.007613359 |\n", "| clip_fraction | 0.0467 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.833 |\n", "| explained_variance | 0.942 |\n", "| learning_rate | 0.0003 |\n", "| loss | 12.2 |\n", "| n_updates | 252 |\n", "| policy_gradient_loss | -0.0017 |\n", "| value_loss | 39.3 |\n", "-----------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 341 |\n", "| ep_rew_mean | 247 |\n", "| time/ | |\n", "| fps | 1565 |\n", "| iterations | 58 |\n", "| time_elapsed | 606 |\n", "| total_timesteps | 950272 |\n", "| train/ | |\n", "| approx_kl | 0.0042011878 |\n", "| clip_fraction | 0.0338 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.822 |\n", "| explained_variance | 0.906 |\n", "| learning_rate | 0.0003 |\n", "| loss | 83.8 |\n", "| n_updates | 256 |\n", "| policy_gradient_loss | -0.000606 |\n", "| value_loss | 88 |\n", "------------------------------------------\n", "------------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 355 |\n", "| ep_rew_mean | 245 |\n", "| time/ | |\n", "| fps | 1576 |\n", "| iterations | 59 |\n", "| time_elapsed | 613 |\n", "| total_timesteps | 966656 |\n", "| train/ | |\n", "| approx_kl | 0.0044656964 |\n", "| clip_fraction | 0.0419 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.816 |\n", "| explained_variance | 0.833 |\n", "| learning_rate | 0.0003 |\n", "| loss | 18.2 |\n", "| n_updates | 260 |\n", "| policy_gradient_loss | -0.00155 |\n", "| value_loss | 226 |\n", "------------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 353 |\n", "| ep_rew_mean | 243 |\n", "| time/ | |\n", "| fps | 1588 |\n", "| iterations | 60 |\n", "| time_elapsed | 619 |\n", "| total_timesteps | 983040 |\n", "| train/ | |\n", "| approx_kl | 0.004190245 |\n", "| clip_fraction | 0.0299 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.806 |\n", "| explained_variance | 0.926 |\n", "| learning_rate | 0.0003 |\n", "| loss | 37 |\n", "| n_updates | 264 |\n", "| policy_gradient_loss | 0.00069 |\n", "| value_loss | 91 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 342 |\n", "| ep_rew_mean | 250 |\n", "| time/ | |\n", "| fps | 1598 |\n", "| iterations | 61 |\n", "| time_elapsed | 625 |\n", "| total_timesteps | 999424 |\n", "| train/ | |\n", "| approx_kl | 0.002153641 |\n", "| clip_fraction | 0.0103 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.795 |\n", "| explained_variance | 0.84 |\n", "| learning_rate | 0.0003 |\n", "| loss | 42.7 |\n", "| n_updates | 268 |\n", "| policy_gradient_loss | 0.000193 |\n", "| value_loss | 335 |\n", "-----------------------------------------\n", "-----------------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 345 |\n", "| ep_rew_mean | 252 |\n", "| time/ | |\n", "| fps | 1610 |\n", "| iterations | 62 |\n", "| time_elapsed | 630 |\n", "| total_timesteps | 1015808 |\n", "| train/ | |\n", "| approx_kl | 0.003585052 |\n", "| clip_fraction | 0.0432 |\n", "| clip_range | 0.2 |\n", "| entropy_loss | -0.758 |\n", "| explained_variance | 0.976 |\n", "| learning_rate | 0.0003 |\n", "| loss | 11.3 |\n", "| n_updates | 272 |\n", "| policy_gradient_loss | -0.000312 |\n", "| value_loss | 32.5 |\n", "-----------------------------------------\n" ] } ], "source": [ "# SOLUTION\n", "# Train it for 1,000,000 timesteps\n", "model.learn(total_timesteps=1000000)\n", "# Save the model\n", "model_name = \"ppo-LunarLander-v2\"\n", "model.save(model_name)" ] }, { "cell_type": "code", "execution_count": 19, "id": "2811ffeb", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/xnpeng/miniforge3/envs/colab/lib/python3.8/site-packages/stable_baselines3/common/evaluation.py:67: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "mean_reward=256.80 +/- 19.81\n" ] } ], "source": [ "eval_env = gym.make(\"LunarLander-v2\")\n", "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)\n", "print(f\"mean_reward={mean_reward:.2f} +/- {std_reward:.2f}\")" ] }, { "cell_type": "code", "execution_count": 18, "id": "665b7073", "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Token is valid.\n", "Your token has been saved in your configured git credential helpers (osxkeychain,store).\n", "Your token has been saved to /Users/xnpeng/.cache/huggingface/token\n", "Login successful\n" ] } ], "source": [ "notebook_login()\n", "!git config --global credential.helper store" ] }, { "cell_type": "code", "execution_count": 20, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001B[38;5;4mℹ This function will save, evaluate, generate a video of your agent,\n", "create a model card and push everything to the hub. It might take up to 1min.\n", "This is a work in progress: if you encounter a bug, please open an issue.\u001B[0m\n" ] }, { "ename": "HfHubHTTPError", "evalue": "403 Client Error: Forbidden for url: https://huggingface.co/api/repos/create (Request ID: Root=1-63fffd3f-574ac7e96ca0c1466d010d33)\n\nYou don't have the rights to create a model under this namespace", "output_type": "error", "traceback": [ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[0;31mHTTPError\u001B[0m Traceback (most recent call last)", "File \u001B[0;32m~/miniforge3/envs/colab/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py:264\u001B[0m, in \u001B[0;36mhf_raise_for_status\u001B[0;34m(response, endpoint_name)\u001B[0m\n\u001B[1;32m 263\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m--> 264\u001B[0m \u001B[43mresponse\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mraise_for_status\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 265\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m HTTPError \u001B[38;5;28;01mas\u001B[39;00m e:\n", "File \u001B[0;32m~/miniforge3/envs/colab/lib/python3.8/site-packages/requests/models.py:1021\u001B[0m, in \u001B[0;36mResponse.raise_for_status\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 1020\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m http_error_msg:\n\u001B[0;32m-> 1021\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m HTTPError(http_error_msg, response\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m)\n", "\u001B[0;31mHTTPError\u001B[0m: 403 Client Error: Forbidden for url: https://huggingface.co/api/repos/create", "\nThe above exception was the direct cause of the following exception:\n", "\u001B[0;31mHfHubHTTPError\u001B[0m Traceback (most recent call last)", "Cell \u001B[0;32mIn[20], line 28\u001B[0m\n\u001B[1;32m 25\u001B[0m eval_env \u001B[38;5;241m=\u001B[39m DummyVecEnv([\u001B[38;5;28;01mlambda\u001B[39;00m: gym\u001B[38;5;241m.\u001B[39mmake(env_id)])\n\u001B[1;32m 27\u001B[0m \u001B[38;5;66;03m# PLACE the package_to_hub function you've just filled here\u001B[39;00m\n\u001B[0;32m---> 28\u001B[0m \u001B[43mpackage_to_hub\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmodel\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmodel\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;66;43;03m# Our trained model\u001B[39;49;00m\n\u001B[1;32m 29\u001B[0m \u001B[43m \u001B[49m\u001B[43mmodel_name\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmodel_name\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;66;43;03m# The name of our trained model\u001B[39;49;00m\n\u001B[1;32m 30\u001B[0m \u001B[43m \u001B[49m\u001B[43mmodel_architecture\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmodel_architecture\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;66;43;03m# The model architecture we used: in our case PPO\u001B[39;49;00m\n\u001B[1;32m 31\u001B[0m \u001B[43m \u001B[49m\u001B[43menv_id\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43menv_id\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;66;43;03m# Name of the environment\u001B[39;49;00m\n\u001B[1;32m 32\u001B[0m \u001B[43m \u001B[49m\u001B[43meval_env\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43meval_env\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;66;43;03m# Evaluation Environment\u001B[39;49;00m\n\u001B[1;32m 33\u001B[0m \u001B[43m \u001B[49m\u001B[43mrepo_id\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrepo_id\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;66;43;03m# id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\u001B[39;49;00m\n\u001B[1;32m 34\u001B[0m \u001B[43m \u001B[49m\u001B[43mcommit_message\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcommit_message\u001B[49m\u001B[43m)\u001B[49m\n", "File \u001B[0;32m~/miniforge3/envs/colab/lib/python3.8/site-packages/huggingface_sb3/push_to_hub.py:333\u001B[0m, in \u001B[0;36mpackage_to_hub\u001B[0;34m(model, model_name, model_architecture, env_id, eval_env, repo_id, commit_message, is_deterministic, n_eval_episodes, token, video_length, logs)\u001B[0m\n\u001B[1;32m 324\u001B[0m eval_env \u001B[38;5;241m=\u001B[39m DummyVecEnv([\u001B[38;5;28;01mlambda\u001B[39;00m: eval_env])\n\u001B[1;32m 326\u001B[0m msg\u001B[38;5;241m.\u001B[39minfo(\n\u001B[1;32m 327\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mThis function will save, evaluate, generate a video of your agent, \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 328\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcreate a model card and push everything to the hub. \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 329\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mIt might take up to 1min. \u001B[39m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 330\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mThis is a work in progress: if you encounter a bug, please open an issue.\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 331\u001B[0m )\n\u001B[0;32m--> 333\u001B[0m repo_url \u001B[38;5;241m=\u001B[39m \u001B[43mHfApi\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcreate_repo\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 334\u001B[0m \u001B[43m \u001B[49m\u001B[43mrepo_id\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mrepo_id\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 335\u001B[0m \u001B[43m \u001B[49m\u001B[43mtoken\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtoken\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 336\u001B[0m \u001B[43m \u001B[49m\u001B[43mprivate\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mFalse\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[1;32m 337\u001B[0m \u001B[43m \u001B[49m\u001B[43mexist_ok\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[1;32m 338\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 340\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m tempfile\u001B[38;5;241m.\u001B[39mTemporaryDirectory() \u001B[38;5;28;01mas\u001B[39;00m tmpdirname:\n\u001B[1;32m 341\u001B[0m tmpdirname \u001B[38;5;241m=\u001B[39m Path(tmpdirname)\n", "File \u001B[0;32m~/miniforge3/envs/colab/lib/python3.8/site-packages/huggingface_hub/utils/_validators.py:124\u001B[0m, in \u001B[0;36mvalidate_hf_hub_args.._inner_fn\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m 119\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m check_use_auth_token:\n\u001B[1;32m 120\u001B[0m kwargs \u001B[38;5;241m=\u001B[39m smoothly_deprecate_use_auth_token(\n\u001B[1;32m 121\u001B[0m fn_name\u001B[38;5;241m=\u001B[39mfn\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m, has_token\u001B[38;5;241m=\u001B[39mhas_token, kwargs\u001B[38;5;241m=\u001B[39mkwargs\n\u001B[1;32m 122\u001B[0m )\n\u001B[0;32m--> 124\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mfn\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", "File \u001B[0;32m~/miniforge3/envs/colab/lib/python3.8/site-packages/huggingface_hub/hf_api.py:2010\u001B[0m, in \u001B[0;36mHfApi.create_repo\u001B[0;34m(self, repo_id, token, private, repo_type, exist_ok, space_sdk, space_hardware)\u001B[0m\n\u001B[1;32m 2007\u001B[0m r \u001B[38;5;241m=\u001B[39m requests\u001B[38;5;241m.\u001B[39mpost(path, headers\u001B[38;5;241m=\u001B[39mheaders, json\u001B[38;5;241m=\u001B[39mjson)\n\u001B[1;32m 2009\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m-> 2010\u001B[0m \u001B[43mhf_raise_for_status\u001B[49m\u001B[43m(\u001B[49m\u001B[43mr\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 2011\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m HTTPError \u001B[38;5;28;01mas\u001B[39;00m err:\n\u001B[1;32m 2012\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m exist_ok \u001B[38;5;129;01mand\u001B[39;00m err\u001B[38;5;241m.\u001B[39mresponse\u001B[38;5;241m.\u001B[39mstatus_code \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m409\u001B[39m:\n\u001B[1;32m 2013\u001B[0m \u001B[38;5;66;03m# Repo already exists and `exist_ok=True`\u001B[39;00m\n", "File \u001B[0;32m~/miniforge3/envs/colab/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py:318\u001B[0m, in \u001B[0;36mhf_raise_for_status\u001B[0;34m(response, endpoint_name)\u001B[0m\n\u001B[1;32m 314\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m BadRequestError(message, response\u001B[38;5;241m=\u001B[39mresponse) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01me\u001B[39;00m\n\u001B[1;32m 316\u001B[0m \u001B[38;5;66;03m# Convert `HTTPError` into a `HfHubHTTPError` to display request information\u001B[39;00m\n\u001B[1;32m 317\u001B[0m \u001B[38;5;66;03m# as well (request id and/or server error message)\u001B[39;00m\n\u001B[0;32m--> 318\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m HfHubHTTPError(\u001B[38;5;28mstr\u001B[39m(e), response\u001B[38;5;241m=\u001B[39mresponse) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01me\u001B[39;00m\n", "\u001B[0;31mHfHubHTTPError\u001B[0m: 403 Client Error: Forbidden for url: https://huggingface.co/api/repos/create (Request ID: Root=1-63fffd3f-574ac7e96ca0c1466d010d33)\n\nYou don't have the rights to create a model under this namespace" ] } ], "source": [ "import gym\n", "\n", "from stable_baselines3 import PPO\n", "from stable_baselines3.common.vec_env import DummyVecEnv\n", "from stable_baselines3.common.env_util import make_vec_env\n", "\n", "from huggingface_sb3 import package_to_hub\n", "\n", "# PLACE the variables you've just defined two cells above\n", "# Define the name of the environment\n", "env_id = \"LunarLander-v2\"\n", "\n", "# TODO: Define the model architecture we used\n", "model_architecture = \"PPO\"\n", "\n", "## Define a repo_id\n", "## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n", "## CHANGE WITH YOUR REPO ID\n", "repo_id = \"Xnpeng/ppo-LunarLander-v2\" # Change with your repo id, you can't push with mine 😄\n", "\n", "## Define the commit message\n", "commit_message = \"Upload PPO LunarLander-v2 trained agent\"\n", "\n", "# Create the evaluation env\n", "eval_env = DummyVecEnv([lambda: gym.make(env_id)])\n", "\n", "# PLACE the package_to_hub function you've just filled here\n", "package_to_hub(model=model, # Our trained model\n", " model_name=model_name, # The name of our trained model\n", " model_architecture=model_architecture, # The model architecture we used: in our case PPO\n", " env_id=env_id, # Name of the environment\n", " eval_env=eval_env, # Evaluation Environment\n", " repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n", " commit_message=commit_message)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 5 }