{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.3342314958572388, "min": 1.2213366031646729, "max": 3.2957451343536377, "count": 5000 }, "SoccerTwos.Policy.Entropy.sum": { "value": 27495.84375, "min": 7687.36572265625, "max": 128714.1171875, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 70.7536231884058, "min": 39.24, "max": 999.0, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19528.0, "min": 9696.0, "max": 31792.0, "count": 5000 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1811.7246262937394, "min": 1179.332189637791, "max": 1848.9829111352722, "count": 4982 }, "SoccerTwos.Self-play.ELO.sum": { "value": 250017.99842853606, "min": 2358.664379275582, "max": 413255.7560197812, "count": 4982 }, "SoccerTwos.Step.mean": { "value": 49999990.0, "min": 9916.0, "max": 49999990.0, "count": 5000 }, "SoccerTwos.Step.sum": { "value": 49999990.0, "min": 9916.0, "max": 49999990.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.043447643518447876, "min": -0.1507469117641449, "max": 0.19217097759246826, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -5.995774745941162, "min": -27.212013244628906, "max": 27.480449676513672, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.04276978597044945, "min": -0.1481342911720276, "max": 0.18647614121437073, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -5.902230262756348, "min": -27.620769500732422, "max": 26.666088104248047, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.1321536242098048, "min": -0.6966782600983329, "max": 0.4442099160399319, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -18.237200140953064, "min": -68.53999996185303, "max": 66.38519954681396, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.1321536242098048, "min": -0.6966782600983329, "max": 0.4442099160399319, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -18.237200140953064, "min": -68.53999996185303, "max": 66.38519954681396, "count": 5000 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.015515439103667934, "min": 0.009302581998781534, "max": 0.026314172179748616, "count": 2425 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.015515439103667934, "min": 0.009302581998781534, "max": 0.026314172179748616, "count": 2425 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.09377614806095759, "min": 3.53941756960315e-05, "max": 0.12944712241490683, "count": 2425 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.09377614806095759, "min": 3.53941756960315e-05, "max": 0.12944712241490683, "count": 2425 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.0945092978576819, "min": 3.493615307282501e-05, "max": 0.1318600905438264, "count": 2425 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.0945092978576819, "min": 3.493615307282501e-05, "max": 0.1318600905438264, "count": 2425 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2425 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2425 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.2, "max": 0.20000000000000007, "count": 2425 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.2, "max": 0.20000000000000007, "count": 2425 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 2425 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 2425 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1675447596", "python_version": "3.9.16 (main, Dec 7 2022, 01:11:51) \n[GCC 9.4.0]", "command_line_arguments": "/home/oren/projects/hf_rl/venv/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --force", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "1.11.0+cu102", "numpy_version": "1.21.2", "end_time_seconds": "1675558010" }, "total": 110413.58382740001, "count": 1, "self": 0.21909820000291802, "children": { "run_training.setup": { "total": 0.006499499999335967, "count": 1, "self": 0.006499499999335967 }, "TrainerController.start_learning": { "total": 110413.35822970001, "count": 1, "self": 57.25071839807788, "children": { "TrainerController._reset_env": { "total": 3.874675999990359, "count": 250, "self": 3.874675999990359 }, "TrainerController.advance": { "total": 110352.12335780196, "count": 3454175, "self": 56.69173418801802, "children": { "env_step": { "total": 52741.61932090802, "count": 3454175, "self": 45825.18188282399, "children": { "SubprocessEnvManager._take_step": { "total": 6884.16374929278, "count": 3454175, "self": 269.21204758729255, "children": { "TorchPolicy.evaluate": { "total": 6614.951701705488, "count": 6280554, "self": 6614.951701705488 } } }, "workers": { "total": 32.273688791252425, "count": 3454175, "self": 0.0, "children": { "worker_root": { "total": 110283.97509838498, "count": 3454175, "is_parallel": true, "self": 70175.64805238618, "children": { "run_training.setup": { "total": 0.0, "count": 0, "is_parallel": true, "self": 0.0, "children": { "steps_from_proto": { "total": 0.0015819000018382212, "count": 2, "is_parallel": true, "self": 0.000414699999964796, "children": { "_process_rank_one_or_two_observation": { "total": 0.0011672000018734252, "count": 8, "is_parallel": true, "self": 0.0011672000018734252 } } }, "UnityEnvironment.step": { "total": 0.021848700000191457, "count": 1, "is_parallel": true, "self": 0.00047259999882953707, "children": { "UnityEnvironment._generate_step_input": { "total": 0.0003153000016027363, "count": 1, "is_parallel": true, "self": 0.0003153000016027363 }, "communicator.exchange": { "total": 0.019867099999828497, "count": 1, "is_parallel": true, "self": 0.019867099999828497 }, "steps_from_proto": { "total": 0.0011936999999306863, "count": 2, "is_parallel": true, "self": 0.0002587000017229002, "children": { "_process_rank_one_or_two_observation": { "total": 0.0009349999982077861, "count": 8, "is_parallel": true, "self": 0.0009349999982077861 } } } } } } }, "UnityEnvironment.step": { "total": 40108.009829298775, "count": 3454174, "is_parallel": true, "self": 1337.0156180963677, "children": { "UnityEnvironment._generate_step_input": { "total": 892.7294806105601, "count": 3454174, "is_parallel": true, "self": 892.7294806105601 }, "communicator.exchange": { "total": 33880.327741501984, "count": 3454174, "is_parallel": true, "self": 33880.327741501984 }, "steps_from_proto": { "total": 3997.9369890898597, "count": 6908348, "is_parallel": true, "self": 854.8218988004282, "children": { "_process_rank_one_or_two_observation": { "total": 3143.1150902894315, "count": 27633392, "is_parallel": true, "self": 3143.1150902894315 } } } } }, "steps_from_proto": { "total": 0.31721670002843894, "count": 498, "is_parallel": true, "self": 0.06564850001450395, "children": { "_process_rank_one_or_two_observation": { "total": 0.251568200013935, "count": 1992, "is_parallel": true, "self": 0.251568200013935 } } } } } } } } }, "trainer_advance": { "total": 57553.81230270592, "count": 3454175, "self": 363.2739607166295, "children": { "process_trajectory": { "total": 6883.052212688999, "count": 3454175, "self": 6871.7666881889545, "children": { "RLTrainer._checkpoint": { "total": 11.28552450004463, "count": 100, "self": 11.28552450004463 } } }, "_update_policy": { "total": 50307.48612930029, "count": 2425, "self": 4582.695688700973, "children": { "TorchPOCAOptimizer.update": { "total": 45724.79044059932, "count": 72756, "self": 45724.79044059932 } } } } } } }, "trainer_threads": { "total": 1.579998934175819e-05, "count": 1, "self": 1.579998934175819e-05 }, "TrainerController._save_models": { "total": 0.10946169999078847, "count": 1, "self": 0.0007872999995015562, "children": { "RLTrainer._checkpoint": { "total": 0.10867439999128692, "count": 1, "self": 0.10867439999128692 } } } } } } }