jostyposty commited on
Commit
21c3779
·
1 Parent(s): f88882e

docs: add usage info

Browse files
Files changed (2) hide show
  1. README.md +19 -2
  2. load.py +20 -0
README.md CHANGED
@@ -72,9 +72,26 @@ Perhaps we should average over more environments? Wouldn't this give a result le
72
  ## Usage (with Stable-baselines3)
73
 
74
  ```python
 
75
  from huggingface_sb3 import load_from_hub
76
- checkpoint = load_from_hub("jostyposty/drl-course-unit-01-lunar-lander-v2", "ppo-LunarLander-v2_010_000_000_hf_defaults.zip")
77
- # TODO: test this
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  ```
79
 
80
 
 
72
  ## Usage (with Stable-baselines3)
73
 
74
  ```python
75
+ import gymnasium as gym
76
  from huggingface_sb3 import load_from_hub
77
+ from stable_baselines3 import PPO
78
+ from stable_baselines3.common.evaluation import evaluate_policy
79
+ from stable_baselines3.common.monitor import Monitor
80
+
81
+ env_id = "LunarLander-v2"
82
+
83
+ model_fp = load_from_hub(
84
+ "jostyposty/drl-course-unit-01-lunar-lander-v2",
85
+ "ppo-LunarLander-v2_010_000_000_hf_defaults.zip",
86
+ )
87
+
88
+ model = PPO.load(model_fp, print_system_info=True)
89
+ eval_env = Monitor(gym.make(env_id))
90
+ mean_reward, std_reward = evaluate_policy(
91
+ model, eval_env, n_eval_episodes=10, deterministic=True
92
+ )
93
+ print(f"results: {mean_reward - std_reward:.2f}")
94
+ print(f"mean_reward: {mean_reward:.2f} +/- {std_reward}")
95
  ```
96
 
97
 
load.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gymnasium as gym
2
+ from huggingface_sb3 import load_from_hub
3
+ from stable_baselines3 import PPO
4
+ from stable_baselines3.common.evaluation import evaluate_policy
5
+ from stable_baselines3.common.monitor import Monitor
6
+
7
+ env_id = "LunarLander-v2"
8
+
9
+ model_fp = load_from_hub(
10
+ "jostyposty/drl-course-unit-01-lunar-lander-v2",
11
+ "ppo-LunarLander-v2_010_000_000_hf_defaults.zip",
12
+ )
13
+
14
+ model = PPO.load(model_fp, print_system_info=True)
15
+ eval_env = Monitor(gym.make(env_id))
16
+ mean_reward, std_reward = evaluate_policy(
17
+ model, eval_env, n_eval_episodes=10, deterministic=True
18
+ )
19
+ print(f"results: {mean_reward - std_reward:.2f}")
20
+ print(f"mean_reward: {mean_reward:.2f} +/- {std_reward}")