{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.4010918140411377, "min": 1.2090040445327759, "max": 3.2957358360290527, "count": 5000 }, "SoccerTwos.Policy.Entropy.sum": { "value": 28515.01953125, "min": 6897.060546875, "max": 128965.90625, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 73.30882352941177, "min": 38.944, "max": 999.0, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19940.0, "min": 15952.0, "max": 27440.0, "count": 5000 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1561.1694317211543, "min": 1184.5423535370517, "max": 1703.3394789078657, "count": 4981 }, "SoccerTwos.Self-play.ELO.sum": { "value": 212319.042714077, "min": 2372.9150272376246, "max": 383157.7328610997, "count": 4981 }, "SoccerTwos.Step.mean": { "value": 49999893.0, "min": 9738.0, "max": 49999893.0, "count": 5000 }, "SoccerTwos.Step.sum": { "value": 49999893.0, "min": 9738.0, "max": 49999893.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.013989397324621677, "min": -0.14385704696178436, "max": 0.2161293774843216, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -1.9025580883026123, "min": -25.043373107910156, "max": 37.83807373046875, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.014541653916239738, "min": -0.14292679727077484, "max": 0.22378826141357422, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -1.9776649475097656, "min": -25.343151092529297, "max": 36.21299743652344, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.19585000066196218, "min": -0.5714285714285714, "max": 0.43453530003042784, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 26.635600090026855, "min": -71.6507995724678, "max": 65.79379975795746, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.19585000066196218, "min": -0.5714285714285714, "max": 0.43453530003042784, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 26.635600090026855, "min": -71.6507995724678, "max": 65.79379975795746, "count": 5000 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.018197235686238857, "min": 0.010609339728641013, "max": 0.025581471160209427, "count": 2423 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.018197235686238857, "min": 0.010609339728641013, "max": 0.025581471160209427, "count": 2423 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.09603026360273362, "min": 0.0005334322918012428, "max": 0.1283156047264735, "count": 2423 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.09603026360273362, "min": 0.0005334322918012428, "max": 0.1283156047264735, "count": 2423 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.0971205140153567, "min": 0.000539599459443707, "max": 0.13175269613663357, "count": 2423 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.0971205140153567, "min": 0.000539599459443707, "max": 0.13175269613663357, "count": 2423 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2423 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2423 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 2423 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 2423 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 2423 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 2423 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1732263812", "python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:34:57) [MSC v.1936 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\hxk\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=D:\\workspace\\marl\\training-envs-executables\\SoccerTwos\\SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0", "mlagents_envs_version": "1.1.0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.5.1+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1732396614" }, "total": 132801.7768022, "count": 1, "self": 5.712018599966541, "children": { "run_training.setup": { "total": 0.13216770003782585, "count": 1, "self": 0.13216770003782585 }, "TrainerController.start_learning": { "total": 132795.9326159, "count": 1, "self": 87.48552955809282, "children": { "TrainerController._reset_env": { "total": 12.636281599581707, "count": 250, "self": 12.636281599581707 }, "TrainerController.advance": { "total": 132695.62327434233, "count": 3461546, "self": 83.77715355256805, "children": { "env_step": { "total": 58312.71861225262, "count": 3461546, "self": 45649.65502013004, "children": { "SubprocessEnvManager._take_step": { "total": 12611.22084773914, "count": 3461546, "self": 446.1160681156325, "children": { "TorchPolicy.evaluate": { "total": 12165.104779623507, "count": 6280736, "self": 12165.104779623507 } } }, "workers": { "total": 51.84274438343709, "count": 3461546, "self": 0.0, "children": { "worker_root": { "total": 132680.80477682757, "count": 3461546, "is_parallel": true, "self": 96614.891509638, "children": { "steps_from_proto": { "total": 0.40552329987986013, "count": 500, "is_parallel": true, "self": 0.08666900132084265, "children": { "_process_rank_one_or_two_observation": { "total": 0.3188542985590175, "count": 2000, "is_parallel": true, "self": 0.3188542985590175 } } }, "UnityEnvironment.step": { "total": 36065.5077438897, "count": 3461546, "is_parallel": true, "self": 1656.977937828051, "children": { "UnityEnvironment._generate_step_input": { "total": 1602.902425405162, "count": 3461546, "is_parallel": true, "self": 1602.902425405162 }, "communicator.exchange": { "total": 27286.42274631752, "count": 3461546, "is_parallel": true, "self": 27286.42274631752 }, "steps_from_proto": { "total": 5519.204634338967, "count": 6923092, "is_parallel": true, "self": 1196.7695126156905, "children": { "_process_rank_one_or_two_observation": { "total": 4322.435121723276, "count": 27692368, "is_parallel": true, "self": 4322.435121723276 } } } } } } } } } } }, "trainer_advance": { "total": 74299.12750853715, "count": 3461546, "self": 545.013101782999, "children": { "process_trajectory": { "total": 12855.483528753917, "count": 3461546, "self": 12843.019238753594, "children": { "RLTrainer._checkpoint": { "total": 12.464290000323672, "count": 100, "self": 12.464290000323672 } } }, "_update_policy": { "total": 60898.63087800023, "count": 2423, "self": 6641.375867509341, "children": { "TorchPOCAOptimizer.update": { "total": 54257.25501049089, "count": 72690, "self": 54257.25501049089 } } } } } } }, "trainer_threads": { "total": 1.300009898841381e-06, "count": 1, "self": 1.300009898841381e-06 }, "TrainerController._save_models": { "total": 0.1875290999887511, "count": 1, "self": 0.054237899952568114, "children": { "RLTrainer._checkpoint": { "total": 0.133291200036183, "count": 1, "self": 0.133291200036183 } } } } } } }