{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.8996624946594238, "min": 1.886587142944336, "max": 2.0112533569335938, "count": 61 }, "SoccerTwos.Policy.Entropy.sum": { "value": 77019.9140625, "min": 74277.203125, "max": 83170.4296875, "count": 61 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 44.34703196347032, "min": 39.54251012145749, "max": 52.645161290322584, "count": 61 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 38848.0, "min": 38620.0, "max": 39760.0, "count": 61 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1595.5836355998229, "min": 1581.8719700331785, "max": 1629.204667611585, "count": 61 }, "SoccerTwos.Self-play.ELO.sum": { "value": 698865.6323927224, "min": 589617.790705327, "max": 789016.2937724582, "count": 61 }, "SoccerTwos.Step.mean": { "value": 11219996.0, "min": 10019919.0, "max": 11219996.0, "count": 61 }, "SoccerTwos.Step.sum": { "value": 11219996.0, "min": 10019919.0, "max": 11219996.0, "count": 61 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.00622489582747221, "min": -0.05283130705356598, "max": 0.04666583240032196, "count": 61 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 2.726504325866699, "min": -24.091075897216797, "max": 20.672964096069336, "count": 61 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.0041276742704212666, "min": -0.05068834871053696, "max": 0.04481092467904091, "count": 61 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 1.8079214096069336, "min": -23.113887786865234, "max": 19.851240158081055, "count": 61 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 61 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 61 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.03666027441416701, "min": -0.1891298249625323, "max": 0.12896794740018133, "count": 61 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -16.05720019340515, "min": -86.24320018291473, "max": 57.132800698280334, "count": 61 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.03666027441416701, "min": -0.1891298249625323, "max": 0.12896794740018133, "count": 61 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -16.05720019340515, "min": -86.24320018291473, "max": 57.132800698280334, "count": 61 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 61 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 61 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.018658186582964843, "min": 0.017024516874807886, "max": 0.01911724130413495, "count": 11 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.018658186582964843, "min": 0.017024516874807886, "max": 0.01911724130413495, "count": 11 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.08370085209608077, "min": 0.0768383824378252, "max": 0.08370085209608077, "count": 11 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.08370085209608077, "min": 0.0768383824378252, "max": 0.08370085209608077, "count": 11 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.0886788744032383, "min": 0.08260714703798294, "max": 0.08937660717964173, "count": 11 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.0886788744032383, "min": 0.08260714703798294, "max": 0.08937660717964173, "count": 11 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 7.202685482541537e-05, "min": 7.202685482541537e-05, "max": 0.00011143723155872304, "count": 11 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 7.202685482541537e-05, "min": 7.202685482541537e-05, "max": 0.00011143723155872304, "count": 11 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.11440535384615381, "min": 0.11440535384615381, "max": 0.12228743076923076, "count": 11 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.11440535384615381, "min": 0.11440535384615381, "max": 0.12228743076923076, "count": 11 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.0007288271569230769, "min": 0.0007288271569230769, "max": 0.0011221427953846152, "count": 11 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.0007288271569230769, "min": 0.0007288271569230769, "max": 0.0011221427953846152, "count": 11 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1733596907", "python_version": "3.10.12 (main, Jul 5 2023, 15:34:07) [Clang 14.0.6 ]", "command_line_arguments": "/Users/mango/anaconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos-v2 --no-graphics --resume", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.2.2", "numpy_version": "1.23.5", "end_time_seconds": "1733606074" }, "total": 9167.079563773994, "count": 1, "self": 1.219165108006564, "children": { "run_training.setup": { "total": 0.042149560002144426, "count": 1, "self": 0.042149560002144426 }, "TrainerController.start_learning": { "total": 9165.818249105985, "count": 1, "self": 1.8252556156221544, "children": { "TrainerController._reset_env": { "total": 5.141387626994401, "count": 6, "self": 5.141387626994401 }, "TrainerController.advance": { "total": 9158.619714393368, "count": 87899, "self": 1.6758576652646298, "children": { "env_step": { "total": 6379.429527955668, "count": 87899, "self": 6162.35989666915, "children": { "SubprocessEnvManager._take_step": { "total": 215.99093429616187, "count": 87899, "self": 8.583292969182367, "children": { "TorchPolicy.evaluate": { "total": 207.4076413269795, "count": 154384, "self": 207.4076413269795 } } }, "workers": { "total": 1.0786969903565478, "count": 87898, "self": 0.0, "children": { "worker_root": { "total": 9158.916076981273, "count": 87898, "is_parallel": true, "self": 3180.6766884052486, "children": { "steps_from_proto": { "total": 0.01726108498405665, "count": 12, "is_parallel": true, "self": 0.002374373027123511, "children": { "_process_rank_one_or_two_observation": { "total": 0.01488671195693314, "count": 48, "is_parallel": true, "self": 0.01488671195693314 } } }, "UnityEnvironment.step": { "total": 5978.222127491041, "count": 87898, "is_parallel": true, "self": 18.891270804422675, "children": { "UnityEnvironment._generate_step_input": { "total": 120.4005277333199, "count": 87898, "is_parallel": true, "self": 120.4005277333199 }, "communicator.exchange": { "total": 5619.16723036037, "count": 87898, "is_parallel": true, "self": 5619.16723036037 }, "steps_from_proto": { "total": 219.76309859292815, "count": 175796, "is_parallel": true, "self": 29.732495062737144, "children": { "_process_rank_one_or_two_observation": { "total": 190.030603530191, "count": 703184, "is_parallel": true, "self": 190.030603530191 } } } } } } } } } } }, "trainer_advance": { "total": 2777.514328772435, "count": 87898, "self": 11.803366322768852, "children": { "process_trajectory": { "total": 275.05257099067967, "count": 87898, "self": 274.4803247986856, "children": { "RLTrainer._checkpoint": { "total": 0.5722461919940542, "count": 2, "self": 0.5722461919940542 } } }, "_update_policy": { "total": 2490.6583914589864, "count": 12, "self": 267.51975328261324, "children": { "TorchPOCAOptimizer.update": { "total": 2223.138638176373, "count": 3000, "self": 2223.138638176373 } } } } } } }, "trainer_threads": { "total": 9.280047379434109e-07, "count": 1, "self": 9.280047379434109e-07 }, "TrainerController._save_models": { "total": 0.23189054199610837, "count": 1, "self": 0.002295552985742688, "children": { "RLTrainer._checkpoint": { "total": 0.22959498901036568, "count": 1, "self": 0.22959498901036568 } } } } } } }