jostyposty
/

drl-course-unit-01-lunar-lander-v2

Reinforcement Learning

stable-baselines3

deep-reinforcement-learning

Model card Files Files and versions Community

jostyposty commited on Mar 26, 2024

Commit

21c3779

·

1 Parent(s): f88882e

docs: add usage info

Files changed (2) hide show

README.md +19 -2
load.py +20 -0

README.md CHANGED Viewed

@@ -72,9 +72,26 @@ Perhaps we should average over more environments? Wouldn't this give a result le
 ## Usage (with Stable-baselines3)
 ```python
 from huggingface_sb3 import load_from_hub
-checkpoint = load_from_hub("jostyposty/drl-course-unit-01-lunar-lander-v2", "ppo-LunarLander-v2_010_000_000_hf_defaults.zip")
-# TODO: test this
 ```

 ## Usage (with Stable-baselines3)
 ```python
+import gymnasium as gym
 from huggingface_sb3 import load_from_hub
+from stable_baselines3 import PPO
+from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3.common.monitor import Monitor
+env_id = "LunarLander-v2"
+model_fp = load_from_hub(
+    "jostyposty/drl-course-unit-01-lunar-lander-v2",
+    "ppo-LunarLander-v2_010_000_000_hf_defaults.zip",
+)
+model = PPO.load(model_fp, print_system_info=True)
+eval_env = Monitor(gym.make(env_id))
+mean_reward, std_reward = evaluate_policy(
+    model, eval_env, n_eval_episodes=10, deterministic=True
+)
+print(f"results: {mean_reward - std_reward:.2f}")
+print(f"mean_reward: {mean_reward:.2f} +/- {std_reward}")
 ```

load.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import gymnasium as gym
+from huggingface_sb3 import load_from_hub
+from stable_baselines3 import PPO
+from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3.common.monitor import Monitor
+env_id = "LunarLander-v2"
+model_fp = load_from_hub(
+    "jostyposty/drl-course-unit-01-lunar-lander-v2",
+    "ppo-LunarLander-v2_010_000_000_hf_defaults.zip",
+)
+model = PPO.load(model_fp, print_system_info=True)
+eval_env = Monitor(gym.make(env_id))
+mean_reward, std_reward = evaluate_policy(
+    model, eval_env, n_eval_episodes=10, deterministic=True
+)
+print(f"results: {mean_reward - std_reward:.2f}")
+print(f"mean_reward: {mean_reward:.2f} +/- {std_reward}")