OpenDILabCommunity
/

CartPole-v0-MuZero

@@ -21,7 +21,7 @@ model-index:
       type: CartPole-v0
     metrics:
     - type: mean_reward
-      value: 195.5 +/- 4.18
       name: mean_reward
 ---
@@ -142,7 +142,7 @@ push_model_to_hub(
     usage_file_by_huggingface_ding="./muzero/cartpole_muzero_download.py",
     train_file="./muzero/cartpole_muzero.py",
     repo_id="OpenDILabCommunity/CartPole-v0-MuZero",
-    create_repo=True
 )
 ```
@@ -155,209 +155,117 @@ push_model_to_hub(
 ```python
 exp_config = {
-    'env': {
-        'manager': {
-            'episode_num': float("inf"),
-            'max_retry': 5,
-            'step_timeout': None,
-            'auto_reset': True,
-            'reset_timeout': None,
-            'retry_type': 'reset',
-            'retry_waiting_time': 0.1,
-            'shared_memory': False,
-            'copy_on_get': True,
-            'context': 'fork',
-            'wait_num': float("inf"),
-            'step_wait_timeout': None,
-            'connect_timeout': 60,
-            'reset_inplace': False,
-            'cfg_type': 'SyncSubprocessEnvManagerDict',
-            'type': 'subprocess'
-        },
-        'stop_value':
-        10000000000,
-        'n_evaluator_episode':
-        3,
-        'type':
-        'cartpole_lightzero',
-        'import_names':
-        ['zoo.classic_control.cartpole.envs.cartpole_lightzero_env'],
-        'env_id':
-        'CartPole-v0',
-        'continuous':
-        False,
-        'manually_discretization':
-        False,
-        'replay_path':
-        '/tmp/tmp4kdr3rf1/videos'
-    },
-    'policy': {
-        'model': {
-            'model_type': 'mlp',
-            'continuous_action_space': False,
-            'observation_shape': 4,
-            'self_supervised_learning_loss': True,
-            'categorical_distribution': True,
-            'image_channel': 1,
-            'frame_stack_num': 1,
-            'num_res_blocks': 1,
-            'num_channels': 64,
-            'support_scale': 300,
-            'bias': True,
-            'discrete_action_encoding_type': 'one_hot',
-            'res_connection_in_dynamics': True,
-            'norm_type': 'BN',
-            'action_space_size': 2,
-            'lstm_hidden_size': 128,
-            'latent_state_dim': 128
-        },
-        'learn': {
-            'learner': {
-                'train_iterations': 1000000000,
-                'dataloader': {
-                    'num_workers': 0
-                },
-                'log_policy': True,
-                'hook': {
-                    'load_ckpt_before_run': '',
-                    'log_show_after_iter': 100,
-                    'save_ckpt_after_iter': 10000,
-                    'save_ckpt_after_run': True
-                },
-                'cfg_type': 'BaseLearnerDict'
-            }
-        },
-        'collect': {
-            'collector': {
-                'deepcopy_obs': False,
-                'transform_obs': False,
-                'collect_print_freq': 100,
-                'cfg_type': 'SampleSerialCollectorDict',
-                'type': 'sample'
-            }
-        },
-        'eval': {
-            'evaluator': {
-                'eval_freq': 1000,
-                'render': {
-                    'render_freq': -1,
-                    'mode': 'train_iter'
-                },
-                'figure_path': None,
-                'cfg_type': 'InteractionSerialEvaluatorDict',
-                'stop_value': 10000000000,
-                'n_episode': 3
             }
         },
-        'other': {
-            'replay_buffer': {
-                'type': 'advanced',
-                'replay_buffer_size': 4096,
-                'max_use': float("inf"),
-                'max_staleness': float("inf"),
-                'alpha': 0.6,
-                'beta': 0.4,
-                'anneal_step': 100000,
-                'enable_track_used_data': False,
-                'deepcopy': False,
-                'thruput_controller': {
-                    'push_sample_rate_limit': {
-                        'max': float("inf"),
-                        'min': 0
-                    },
-                    'window_seconds': 30,
-                    'sample_min_limit_ratio': 1
-                },
-                'monitor': {
-                    'sampled_data_attr': {
-                        'average_range': 5,
-                        'print_freq': 200
-                    },
-                    'periodic_thruput': {
-                        'seconds': 60
-                    }
-                },
-                'cfg_type': 'AdvancedReplayBufferDict'
             },
-            'commander': {
-                'cfg_type': 'BaseSerialCommanderDict'
-            }
-        },
-        'on_policy': False,
-        'cuda': True,
-        'multi_gpu': False,
-        'bp_update_sync': True,
-        'traj_len_inf': False,
-        'use_rnd_model': False,
-        'sampled_algo': False,
-        'gumbel_algo': False,
-        'mcts_ctree': True,
-        'collector_env_num': 8,
-        'evaluator_env_num': 3,
-        'env_type': 'not_board_games',
-        'battle_mode': 'play_with_bot_mode',
-        'monitor_extra_statistics': True,
-        'game_segment_length': 50,
-        'transform2string': False,
-        'gray_scale': False,
-        'use_augmentation': False,
-        'augmentation': ['shift', 'intensity'],
-        'ignore_done': False,
-        'update_per_collect': 100,
-        'model_update_ratio': 0.1,
-        'batch_size': 256,
-        'optim_type': 'Adam',
-        'learning_rate': 0.003,
-        'target_update_freq': 100,
-        'target_update_freq_for_intrinsic_reward': 1000,
-        'weight_decay': 0.0001,
-        'momentum': 0.9,
-        'grad_clip_value': 10,
-        'n_episode': 8,
-        'num_simulations': 25,
-        'discount_factor': 0.997,
-        'td_steps': 5,
-        'num_unroll_steps': 5,
-        'reward_loss_weight': 1,
-        'value_loss_weight': 0.25,
-        'policy_loss_weight': 1,
-        'policy_entropy_loss_weight': 0,
-        'ssl_loss_weight': 2,
-        'lr_piecewise_constant_decay': False,
-        'threshold_training_steps_for_final_lr': 50000,
-        'manual_temperature_decay': False,
-        'threshold_training_steps_for_final_temperature': 100000,
-        'fixed_temperature_value': 0.25,
-        'use_ture_chance_label_in_chance_encoder': False,
-        'use_priority': True,
-        'priority_prob_alpha': 0.6,
-        'priority_prob_beta': 0.4,
-        'root_dirichlet_alpha': 0.3,
-        'root_noise_weight': 0.25,
-        'random_collect_episode_num': 0,
-        'eps': {
-            'eps_greedy_exploration_in_collect': False,
-            'type': 'linear',
-            'start': 1.0,
-            'end': 0.05,
-            'decay': 100000
         },
-        'cfg_type': 'MuZeroPolicyDict',
-        'type': 'muzero',
-        'import_names': ['lzero.policy.muzero'],
-        'reanalyze_ratio': 0,
-        'eval_freq': 200,
-        'replay_buffer_size': 1000000,
-        'device': 'cuda'
     },
-    'exp_name': 'CartPole-v0-MuZero',
-    'seed': 0,
-    'wandb_logger': {
-        'gradient_logger': False,
-        'video_logger': False,
-        'plot_logger': False,
-        'action_logger': False,
-        'return_logger': False
     }
 }
@@ -371,7 +279,7 @@ exp_config = {
 ## Model Information
 <!-- Provide the basic links for the model. -->
 - **Github Repository:** [repo link](https://github.com/opendilab/LightZero)
-- **Doc**: [DI-engine-docs Algorithm link](<TODO>)
 - **Configuration:** [config link](https://huggingface.co/OpenDILabCommunity/CartPole-v0-MuZero/blob/main/policy_config.py)
 - **Demo:** [video](https://huggingface.co/OpenDILabCommunity/CartPole-v0-MuZero/blob/main/replay.mp4)
 <!-- Provide the size information for the model. -->
@@ -385,4 +293,4 @@ exp_config = {
 - **Gym version:** 0.25.1
 - **DI-engine version:** v0.4.9
 - **PyTorch version:** 2.1.1+cu121
-- **Doc**: [DI-engine-docs Environments link](<TODO>)

       type: CartPole-v0
     metrics:
     - type: mean_reward
+      value: 200.0 +/- 0.0
       name: mean_reward
 ---
     usage_file_by_huggingface_ding="./muzero/cartpole_muzero_download.py",
     train_file="./muzero/cartpole_muzero.py",
     repo_id="OpenDILabCommunity/CartPole-v0-MuZero",
+    create_repo=False
 )
 ```
 ```python
 exp_config = {
+    'main_config': {
+        'exp_name': 'CartPole-v0-MuZero',
+        'seed': 0,
+        'env': {
+            'env_id': 'CartPole-v0',
+            'continuous': False,
+            'manually_discretization': False,
+            'collector_env_num': 8,
+            'evaluator_env_num': 3,
+            'n_evaluator_episode': 3,
+            'manager': {
+                'shared_memory': False
             }
         },
+        'policy': {
+            'on_policy': False,
+            'cuda': True,
+            'multi_gpu': False,
+            'bp_update_sync': True,
+            'traj_len_inf': False,
+            'model': {
+                'observation_shape': 4,
+                'action_space_size': 2,
+                'model_type': 'mlp',
+                'lstm_hidden_size': 128,
+                'latent_state_dim': 128,
+                'self_supervised_learning_loss': True,
+                'discrete_action_encoding_type': 'one_hot',
+                'norm_type': 'BN'
             },
+            'use_rnd_model': False,
+            'sampled_algo': False,
+            'gumbel_algo': False,
+            'mcts_ctree': True,
+            'collector_env_num': 8,
+            'evaluator_env_num': 3,
+            'env_type': 'not_board_games',
+            'battle_mode': 'play_with_bot_mode',
+            'monitor_extra_statistics': True,
+            'game_segment_length': 50,
+            'transform2string': False,
+            'gray_scale': False,
+            'use_augmentation': False,
+            'augmentation': ['shift', 'intensity'],
+            'ignore_done': False,
+            'update_per_collect': 100,
+            'model_update_ratio': 0.1,
+            'batch_size': 256,
+            'optim_type': 'Adam',
+            'learning_rate': 0.003,
+            'target_update_freq': 100,
+            'target_update_freq_for_intrinsic_reward': 1000,
+            'weight_decay': 0.0001,
+            'momentum': 0.9,
+            'grad_clip_value': 10,
+            'n_episode': 8,
+            'num_simulations': 25,
+            'discount_factor': 0.997,
+            'td_steps': 5,
+            'num_unroll_steps': 5,
+            'reward_loss_weight': 1,
+            'value_loss_weight': 0.25,
+            'policy_loss_weight': 1,
+            'policy_entropy_loss_weight': 0,
+            'ssl_loss_weight': 2,
+            'lr_piecewise_constant_decay': False,
+            'threshold_training_steps_for_final_lr': 50000,
+            'manual_temperature_decay': False,
+            'threshold_training_steps_for_final_temperature': 100000,
+            'fixed_temperature_value': 0.25,
+            'use_ture_chance_label_in_chance_encoder': False,
+            'use_priority': True,
+            'priority_prob_alpha': 0.6,
+            'priority_prob_beta': 0.4,
+            'root_dirichlet_alpha': 0.3,
+            'root_noise_weight': 0.25,
+            'random_collect_episode_num': 0,
+            'eps': {
+                'eps_greedy_exploration_in_collect': False,
+                'type': 'linear',
+                'start': 1.0,
+                'end': 0.05,
+                'decay': 100000
+            },
+            'cfg_type': 'MuZeroPolicyDict',
+            'reanalyze_ratio': 0,
+            'eval_freq': 200,
+            'replay_buffer_size': 1000000
         },
+        'wandb_logger': {
+            'gradient_logger': False,
+            'video_logger': False,
+            'plot_logger': False,
+            'action_logger': False,
+            'return_logger': False
+        }
     },
+    'create_config': {
+        'env': {
+            'type':
+            'cartpole_lightzero',
+            'import_names':
+            ['zoo.classic_control.cartpole.envs.cartpole_lightzero_env']
+        },
+        'env_manager': {
+            'type': 'subprocess'
+        },
+        'policy': {
+            'type': 'muzero',
+            'import_names': ['lzero.policy.muzero']
+        }
     }
 }
 ## Model Information
 <!-- Provide the basic links for the model. -->
 - **Github Repository:** [repo link](https://github.com/opendilab/LightZero)
+- **Doc**: [Algorithm link](<TODO>)
 - **Configuration:** [config link](https://huggingface.co/OpenDILabCommunity/CartPole-v0-MuZero/blob/main/policy_config.py)
 - **Demo:** [video](https://huggingface.co/OpenDILabCommunity/CartPole-v0-MuZero/blob/main/replay.mp4)
 <!-- Provide the size information for the model. -->
 - **Gym version:** 0.25.1
 - **DI-engine version:** v0.4.9
 - **PyTorch version:** 2.1.1+cu121
+- **Doc**: [Environments link](<TODO>)