zjowowen commited on
Commit
dc3b74f
1 Parent(s): 3836aeb

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +110 -202
README.md CHANGED
@@ -21,7 +21,7 @@ model-index:
21
  type: CartPole-v0
22
  metrics:
23
  - type: mean_reward
24
- value: 195.5 +/- 4.18
25
  name: mean_reward
26
  ---
27
 
@@ -142,7 +142,7 @@ push_model_to_hub(
142
  usage_file_by_huggingface_ding="./muzero/cartpole_muzero_download.py",
143
  train_file="./muzero/cartpole_muzero.py",
144
  repo_id="OpenDILabCommunity/CartPole-v0-MuZero",
145
- create_repo=True
146
  )
147
 
148
  ```
@@ -155,209 +155,117 @@ push_model_to_hub(
155
 
156
  ```python
157
  exp_config = {
158
- 'env': {
159
- 'manager': {
160
- 'episode_num': float("inf"),
161
- 'max_retry': 5,
162
- 'step_timeout': None,
163
- 'auto_reset': True,
164
- 'reset_timeout': None,
165
- 'retry_type': 'reset',
166
- 'retry_waiting_time': 0.1,
167
- 'shared_memory': False,
168
- 'copy_on_get': True,
169
- 'context': 'fork',
170
- 'wait_num': float("inf"),
171
- 'step_wait_timeout': None,
172
- 'connect_timeout': 60,
173
- 'reset_inplace': False,
174
- 'cfg_type': 'SyncSubprocessEnvManagerDict',
175
- 'type': 'subprocess'
176
- },
177
- 'stop_value':
178
- 10000000000,
179
- 'n_evaluator_episode':
180
- 3,
181
- 'type':
182
- 'cartpole_lightzero',
183
- 'import_names':
184
- ['zoo.classic_control.cartpole.envs.cartpole_lightzero_env'],
185
- 'env_id':
186
- 'CartPole-v0',
187
- 'continuous':
188
- False,
189
- 'manually_discretization':
190
- False,
191
- 'replay_path':
192
- '/tmp/tmp4kdr3rf1/videos'
193
- },
194
- 'policy': {
195
- 'model': {
196
- 'model_type': 'mlp',
197
- 'continuous_action_space': False,
198
- 'observation_shape': 4,
199
- 'self_supervised_learning_loss': True,
200
- 'categorical_distribution': True,
201
- 'image_channel': 1,
202
- 'frame_stack_num': 1,
203
- 'num_res_blocks': 1,
204
- 'num_channels': 64,
205
- 'support_scale': 300,
206
- 'bias': True,
207
- 'discrete_action_encoding_type': 'one_hot',
208
- 'res_connection_in_dynamics': True,
209
- 'norm_type': 'BN',
210
- 'action_space_size': 2,
211
- 'lstm_hidden_size': 128,
212
- 'latent_state_dim': 128
213
- },
214
- 'learn': {
215
- 'learner': {
216
- 'train_iterations': 1000000000,
217
- 'dataloader': {
218
- 'num_workers': 0
219
- },
220
- 'log_policy': True,
221
- 'hook': {
222
- 'load_ckpt_before_run': '',
223
- 'log_show_after_iter': 100,
224
- 'save_ckpt_after_iter': 10000,
225
- 'save_ckpt_after_run': True
226
- },
227
- 'cfg_type': 'BaseLearnerDict'
228
- }
229
- },
230
- 'collect': {
231
- 'collector': {
232
- 'deepcopy_obs': False,
233
- 'transform_obs': False,
234
- 'collect_print_freq': 100,
235
- 'cfg_type': 'SampleSerialCollectorDict',
236
- 'type': 'sample'
237
- }
238
- },
239
- 'eval': {
240
- 'evaluator': {
241
- 'eval_freq': 1000,
242
- 'render': {
243
- 'render_freq': -1,
244
- 'mode': 'train_iter'
245
- },
246
- 'figure_path': None,
247
- 'cfg_type': 'InteractionSerialEvaluatorDict',
248
- 'stop_value': 10000000000,
249
- 'n_episode': 3
250
  }
251
  },
252
- 'other': {
253
- 'replay_buffer': {
254
- 'type': 'advanced',
255
- 'replay_buffer_size': 4096,
256
- 'max_use': float("inf"),
257
- 'max_staleness': float("inf"),
258
- 'alpha': 0.6,
259
- 'beta': 0.4,
260
- 'anneal_step': 100000,
261
- 'enable_track_used_data': False,
262
- 'deepcopy': False,
263
- 'thruput_controller': {
264
- 'push_sample_rate_limit': {
265
- 'max': float("inf"),
266
- 'min': 0
267
- },
268
- 'window_seconds': 30,
269
- 'sample_min_limit_ratio': 1
270
- },
271
- 'monitor': {
272
- 'sampled_data_attr': {
273
- 'average_range': 5,
274
- 'print_freq': 200
275
- },
276
- 'periodic_thruput': {
277
- 'seconds': 60
278
- }
279
- },
280
- 'cfg_type': 'AdvancedReplayBufferDict'
281
  },
282
- 'commander': {
283
- 'cfg_type': 'BaseSerialCommanderDict'
284
- }
285
- },
286
- 'on_policy': False,
287
- 'cuda': True,
288
- 'multi_gpu': False,
289
- 'bp_update_sync': True,
290
- 'traj_len_inf': False,
291
- 'use_rnd_model': False,
292
- 'sampled_algo': False,
293
- 'gumbel_algo': False,
294
- 'mcts_ctree': True,
295
- 'collector_env_num': 8,
296
- 'evaluator_env_num': 3,
297
- 'env_type': 'not_board_games',
298
- 'battle_mode': 'play_with_bot_mode',
299
- 'monitor_extra_statistics': True,
300
- 'game_segment_length': 50,
301
- 'transform2string': False,
302
- 'gray_scale': False,
303
- 'use_augmentation': False,
304
- 'augmentation': ['shift', 'intensity'],
305
- 'ignore_done': False,
306
- 'update_per_collect': 100,
307
- 'model_update_ratio': 0.1,
308
- 'batch_size': 256,
309
- 'optim_type': 'Adam',
310
- 'learning_rate': 0.003,
311
- 'target_update_freq': 100,
312
- 'target_update_freq_for_intrinsic_reward': 1000,
313
- 'weight_decay': 0.0001,
314
- 'momentum': 0.9,
315
- 'grad_clip_value': 10,
316
- 'n_episode': 8,
317
- 'num_simulations': 25,
318
- 'discount_factor': 0.997,
319
- 'td_steps': 5,
320
- 'num_unroll_steps': 5,
321
- 'reward_loss_weight': 1,
322
- 'value_loss_weight': 0.25,
323
- 'policy_loss_weight': 1,
324
- 'policy_entropy_loss_weight': 0,
325
- 'ssl_loss_weight': 2,
326
- 'lr_piecewise_constant_decay': False,
327
- 'threshold_training_steps_for_final_lr': 50000,
328
- 'manual_temperature_decay': False,
329
- 'threshold_training_steps_for_final_temperature': 100000,
330
- 'fixed_temperature_value': 0.25,
331
- 'use_ture_chance_label_in_chance_encoder': False,
332
- 'use_priority': True,
333
- 'priority_prob_alpha': 0.6,
334
- 'priority_prob_beta': 0.4,
335
- 'root_dirichlet_alpha': 0.3,
336
- 'root_noise_weight': 0.25,
337
- 'random_collect_episode_num': 0,
338
- 'eps': {
339
- 'eps_greedy_exploration_in_collect': False,
340
- 'type': 'linear',
341
- 'start': 1.0,
342
- 'end': 0.05,
343
- 'decay': 100000
344
  },
345
- 'cfg_type': 'MuZeroPolicyDict',
346
- 'type': 'muzero',
347
- 'import_names': ['lzero.policy.muzero'],
348
- 'reanalyze_ratio': 0,
349
- 'eval_freq': 200,
350
- 'replay_buffer_size': 1000000,
351
- 'device': 'cuda'
352
  },
353
- 'exp_name': 'CartPole-v0-MuZero',
354
- 'seed': 0,
355
- 'wandb_logger': {
356
- 'gradient_logger': False,
357
- 'video_logger': False,
358
- 'plot_logger': False,
359
- 'action_logger': False,
360
- 'return_logger': False
 
 
 
 
 
 
361
  }
362
  }
363
 
@@ -371,7 +279,7 @@ exp_config = {
371
  ## Model Information
372
  <!-- Provide the basic links for the model. -->
373
  - **Github Repository:** [repo link](https://github.com/opendilab/LightZero)
374
- - **Doc**: [DI-engine-docs Algorithm link](<TODO>)
375
  - **Configuration:** [config link](https://huggingface.co/OpenDILabCommunity/CartPole-v0-MuZero/blob/main/policy_config.py)
376
  - **Demo:** [video](https://huggingface.co/OpenDILabCommunity/CartPole-v0-MuZero/blob/main/replay.mp4)
377
  <!-- Provide the size information for the model. -->
@@ -385,4 +293,4 @@ exp_config = {
385
  - **Gym version:** 0.25.1
386
  - **DI-engine version:** v0.4.9
387
  - **PyTorch version:** 2.1.1+cu121
388
- - **Doc**: [DI-engine-docs Environments link](<TODO>)
 
21
  type: CartPole-v0
22
  metrics:
23
  - type: mean_reward
24
+ value: 200.0 +/- 0.0
25
  name: mean_reward
26
  ---
27
 
 
142
  usage_file_by_huggingface_ding="./muzero/cartpole_muzero_download.py",
143
  train_file="./muzero/cartpole_muzero.py",
144
  repo_id="OpenDILabCommunity/CartPole-v0-MuZero",
145
+ create_repo=False
146
  )
147
 
148
  ```
 
155
 
156
  ```python
157
  exp_config = {
158
+ 'main_config': {
159
+ 'exp_name': 'CartPole-v0-MuZero',
160
+ 'seed': 0,
161
+ 'env': {
162
+ 'env_id': 'CartPole-v0',
163
+ 'continuous': False,
164
+ 'manually_discretization': False,
165
+ 'collector_env_num': 8,
166
+ 'evaluator_env_num': 3,
167
+ 'n_evaluator_episode': 3,
168
+ 'manager': {
169
+ 'shared_memory': False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  }
171
  },
172
+ 'policy': {
173
+ 'on_policy': False,
174
+ 'cuda': True,
175
+ 'multi_gpu': False,
176
+ 'bp_update_sync': True,
177
+ 'traj_len_inf': False,
178
+ 'model': {
179
+ 'observation_shape': 4,
180
+ 'action_space_size': 2,
181
+ 'model_type': 'mlp',
182
+ 'lstm_hidden_size': 128,
183
+ 'latent_state_dim': 128,
184
+ 'self_supervised_learning_loss': True,
185
+ 'discrete_action_encoding_type': 'one_hot',
186
+ 'norm_type': 'BN'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  },
188
+ 'use_rnd_model': False,
189
+ 'sampled_algo': False,
190
+ 'gumbel_algo': False,
191
+ 'mcts_ctree': True,
192
+ 'collector_env_num': 8,
193
+ 'evaluator_env_num': 3,
194
+ 'env_type': 'not_board_games',
195
+ 'battle_mode': 'play_with_bot_mode',
196
+ 'monitor_extra_statistics': True,
197
+ 'game_segment_length': 50,
198
+ 'transform2string': False,
199
+ 'gray_scale': False,
200
+ 'use_augmentation': False,
201
+ 'augmentation': ['shift', 'intensity'],
202
+ 'ignore_done': False,
203
+ 'update_per_collect': 100,
204
+ 'model_update_ratio': 0.1,
205
+ 'batch_size': 256,
206
+ 'optim_type': 'Adam',
207
+ 'learning_rate': 0.003,
208
+ 'target_update_freq': 100,
209
+ 'target_update_freq_for_intrinsic_reward': 1000,
210
+ 'weight_decay': 0.0001,
211
+ 'momentum': 0.9,
212
+ 'grad_clip_value': 10,
213
+ 'n_episode': 8,
214
+ 'num_simulations': 25,
215
+ 'discount_factor': 0.997,
216
+ 'td_steps': 5,
217
+ 'num_unroll_steps': 5,
218
+ 'reward_loss_weight': 1,
219
+ 'value_loss_weight': 0.25,
220
+ 'policy_loss_weight': 1,
221
+ 'policy_entropy_loss_weight': 0,
222
+ 'ssl_loss_weight': 2,
223
+ 'lr_piecewise_constant_decay': False,
224
+ 'threshold_training_steps_for_final_lr': 50000,
225
+ 'manual_temperature_decay': False,
226
+ 'threshold_training_steps_for_final_temperature': 100000,
227
+ 'fixed_temperature_value': 0.25,
228
+ 'use_ture_chance_label_in_chance_encoder': False,
229
+ 'use_priority': True,
230
+ 'priority_prob_alpha': 0.6,
231
+ 'priority_prob_beta': 0.4,
232
+ 'root_dirichlet_alpha': 0.3,
233
+ 'root_noise_weight': 0.25,
234
+ 'random_collect_episode_num': 0,
235
+ 'eps': {
236
+ 'eps_greedy_exploration_in_collect': False,
237
+ 'type': 'linear',
238
+ 'start': 1.0,
239
+ 'end': 0.05,
240
+ 'decay': 100000
241
+ },
242
+ 'cfg_type': 'MuZeroPolicyDict',
243
+ 'reanalyze_ratio': 0,
244
+ 'eval_freq': 200,
245
+ 'replay_buffer_size': 1000000
 
 
 
 
246
  },
247
+ 'wandb_logger': {
248
+ 'gradient_logger': False,
249
+ 'video_logger': False,
250
+ 'plot_logger': False,
251
+ 'action_logger': False,
252
+ 'return_logger': False
253
+ }
254
  },
255
+ 'create_config': {
256
+ 'env': {
257
+ 'type':
258
+ 'cartpole_lightzero',
259
+ 'import_names':
260
+ ['zoo.classic_control.cartpole.envs.cartpole_lightzero_env']
261
+ },
262
+ 'env_manager': {
263
+ 'type': 'subprocess'
264
+ },
265
+ 'policy': {
266
+ 'type': 'muzero',
267
+ 'import_names': ['lzero.policy.muzero']
268
+ }
269
  }
270
  }
271
 
 
279
  ## Model Information
280
  <!-- Provide the basic links for the model. -->
281
  - **Github Repository:** [repo link](https://github.com/opendilab/LightZero)
282
+ - **Doc**: [Algorithm link](<TODO>)
283
  - **Configuration:** [config link](https://huggingface.co/OpenDILabCommunity/CartPole-v0-MuZero/blob/main/policy_config.py)
284
  - **Demo:** [video](https://huggingface.co/OpenDILabCommunity/CartPole-v0-MuZero/blob/main/replay.mp4)
285
  <!-- Provide the size information for the model. -->
 
293
  - **Gym version:** 0.25.1
294
  - **DI-engine version:** v0.4.9
295
  - **PyTorch version:** 2.1.1+cu121
296
+ - **Doc**: [Environments link](<TODO>)