{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.871659755706787, "min": 1.785607099533081, "max": 3.218742609024048, "count": 1699 }, "SoccerTwos.Policy.Entropy.sum": { "value": 39709.1328125, "min": 17657.279296875, "max": 129650.1953125, "count": 1699 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 84.1, "min": 61.675, "max": 999.0, "count": 1699 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 20184.0, "min": 2764.0, "max": 33472.0, "count": 1699 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1638.7703722534197, "min": 1182.9638676306631, "max": 1681.307426112181, "count": 1349 }, "SoccerTwos.Self-play.ELO.sum": { "value": 196652.44467041036, "min": 2365.9277352613262, "max": 243281.47223582875, "count": 1349 }, "SoccerTwos.Step.mean": { "value": 17479886.0, "min": 499888.0, "max": 17479886.0, "count": 1699 }, "SoccerTwos.Step.sum": { "value": 17479886.0, "min": 499888.0, "max": 17479886.0, "count": 1699 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.09041914343833923, "min": -0.09385305643081665, "max": 0.11160458624362946, "count": 1699 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -10.759878158569336, "min": -12.537370681762695, "max": 14.396991729736328, "count": 1699 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.09380974620580673, "min": -0.09380974620580673, "max": 0.11068973690271378, "count": 1699 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -11.163359642028809, "min": -12.2941255569458, "max": 14.278976440429688, "count": 1699 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1699 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1699 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.41249075657179374, "min": -1.0, "max": 0.4748181809078563, "count": 1699 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -49.08640003204346, "min": -50.36000007390976, "max": 49.83799999952316, "count": 1699 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.41249075657179374, "min": -1.0, "max": 0.4748181809078563, "count": 1699 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -49.08640003204346, "min": -50.36000007390976, "max": 49.83799999952316, "count": 1699 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1699 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1699 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.020723478798754514, "min": 0.010463065703834217, "max": 0.024784044610957304, "count": 806 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.020723478798754514, "min": 0.010463065703834217, "max": 0.024784044610957304, "count": 806 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.08757540360093116, "min": 4.064445858394568e-08, "max": 0.08757540360093116, "count": 806 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.08757540360093116, "min": 4.064445858394568e-08, "max": 0.08757540360093116, "count": 806 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.08946202173829079, "min": 4.1550603668838446e-08, "max": 0.08946202173829079, "count": 806 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.08946202173829079, "min": 4.1550603668838446e-08, "max": 0.08946202173829079, "count": 806 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 806 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 806 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 806 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 806 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 806 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 806 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1682413086", "python_version": "3.9.16 (main, Mar 8 2023, 10:39:24) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "C:\\Users\\SumitKumar\\.conda\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics --resume", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.0.0+cpu", "numpy_version": "1.21.2", "end_time_seconds": "1682475904" }, "total": 62817.7116879, "count": 1, "self": 1.1739598000058322, "children": { "run_training.setup": { "total": 0.14583369999999984, "count": 1, "self": 0.14583369999999984 }, "TrainerController.start_learning": { "total": 62816.391894399996, "count": 1, "self": 28.509888400541968, "children": { "TrainerController._reset_env": { "total": 11.71086960001029, "count": 86, "self": 11.71086960001029 }, "TrainerController.advance": { "total": 62775.96234499945, "count": 1124182, "self": 32.05896769843821, "children": { "env_step": { "total": 23495.912942799187, "count": 1124182, "self": 18609.023699891215, "children": { "SubprocessEnvManager._take_step": { "total": 4867.3857627047, "count": 1124182, "self": 182.58787870796095, "children": { "TorchPolicy.evaluate": { "total": 4684.797883996739, "count": 2160186, "self": 4684.797883996739 } } }, "workers": { "total": 19.503480203272872, "count": 1124181, "self": 0.0, "children": { "worker_root": { "total": 62765.65317139514, "count": 1124181, "is_parallel": true, "self": 47758.5412185937, "children": { "steps_from_proto": { "total": 0.1711057999539296, "count": 172, "is_parallel": true, "self": 0.035403900038018854, "children": { "_process_rank_one_or_two_observation": { "total": 0.13570189991591075, "count": 688, "is_parallel": true, "self": 0.13570189991591075 } } }, "UnityEnvironment.step": { "total": 15006.940847001486, "count": 1124181, "is_parallel": true, "self": 784.5641267931096, "children": { "UnityEnvironment._generate_step_input": { "total": 644.4703808011335, "count": 1124181, "is_parallel": true, "self": 644.4703808011335 }, "communicator.exchange": { "total": 11000.886401400896, "count": 1124181, "is_parallel": true, "self": 11000.886401400896 }, "steps_from_proto": { "total": 2577.0199380063477, "count": 2248362, "is_parallel": true, "self": 505.2159105033397, "children": { "_process_rank_one_or_two_observation": { "total": 2071.804027503008, "count": 8993448, "is_parallel": true, "self": 2071.804027503008 } } } } } } } } } } }, "trainer_advance": { "total": 39247.99043450182, "count": 1124181, "self": 206.47912999861728, "children": { "process_trajectory": { "total": 4316.292860203179, "count": 1124181, "self": 4310.968071203197, "children": { "RLTrainer._checkpoint": { "total": 5.324788999981692, "count": 34, "self": 5.324788999981692 } } }, "_update_policy": { "total": 34725.21844430002, "count": 807, "self": 2688.914494900011, "children": { "TorchPOCAOptimizer.update": { "total": 32036.30394940001, "count": 24228, "self": 32036.30394940001 } } } } } } }, "trainer_threads": { "total": 2.3999964469112456e-06, "count": 1, "self": 2.3999964469112456e-06 }, "TrainerController._save_models": { "total": 0.20878900000388967, "count": 1, "self": 0.0442892999999458, "children": { "RLTrainer._checkpoint": { "total": 0.16449970000394387, "count": 1, "self": 0.16449970000394387 } } } } } } }