{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 2.3724489212036133, "min": 2.35398006439209, "max": 3.295729160308838, "count": 200 }, "SoccerTwos.Policy.Entropy.sum": { "value": 46613.875, "min": 11777.2392578125, "max": 105463.28125, "count": 200 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 60.13414634146341, "min": 47.601941747572816, "max": 999.0, "count": 200 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19724.0, "min": 11628.0, "max": 30448.0, "count": 200 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1498.0635172021985, "min": 1200.7142360791336, "max": 1498.4165916460247, "count": 192 }, "SoccerTwos.Self-play.ELO.sum": { "value": 245682.41682116053, "min": 2403.259069081372, "max": 301467.75996944495, "count": 192 }, "SoccerTwos.Step.mean": { "value": 1999990.0, "min": 9242.0, "max": 1999990.0, "count": 200 }, "SoccerTwos.Step.sum": { "value": 1999990.0, "min": 9242.0, "max": 1999990.0, "count": 200 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.03040555864572525, "min": -0.04107595607638359, "max": 0.1871304214000702, "count": 200 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -4.956106185913086, "min": -7.475823879241943, "max": 28.900962829589844, "count": 200 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.025555307045578957, "min": -0.03806399181485176, "max": 0.19258709251880646, "count": 200 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -4.165514945983887, "min": -6.927646636962891, "max": 28.431549072265625, "count": 200 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 200 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 200 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.07679509019559147, "min": -0.7142857142857143, "max": 0.498006720001958, "count": 200 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -12.517599701881409, "min": -36.26460003852844, "max": 63.30120038986206, "count": 200 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.07679509019559147, "min": -0.7142857142857143, "max": 0.498006720001958, "count": 200 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -12.517599701881409, "min": -36.26460003852844, "max": 63.30120038986206, "count": 200 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 200 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 200 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.01417751678188021, "min": 0.012125546843162738, "max": 0.023403239927332228, "count": 95 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.01417751678188021, "min": 0.012125546843162738, "max": 0.023403239927332228, "count": 95 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.10028252626458804, "min": 0.0003658233804647656, "max": 0.10883518730600675, "count": 95 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.10028252626458804, "min": 0.0003658233804647656, "max": 0.10883518730600675, "count": 95 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.10191588525970777, "min": 0.0003673039436156008, "max": 0.11179724062482516, "count": 95 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.10191588525970777, "min": 0.0003673039436156008, "max": 0.11179724062482516, "count": 95 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 95 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 95 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 95 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000004, "max": 0.20000000000000007, "count": 95 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 95 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 95 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1678304692", "python_version": "3.9.16 (main, Mar 8 2023, 10:39:24) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "C:\\Users\\jacky\\.conda\\envs\\rl\\Scripts\\mlagents-learn C:\\rl\\ml-agents\\config\\poca\\SoccerTwos.yaml --env=C:\\rl\\ml-agents\\training-envs-executables\\SoccerTwos\\SoccerTwos.exe --run-id=SoccerTwos2Ma --no-graphics", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "1.12.1+cpu", "numpy_version": "1.21.2", "end_time_seconds": "1678311717" }, "total": 7024.9085793, "count": 1, "self": 0.3392129000003479, "children": { "run_training.setup": { "total": 0.11652259999999992, "count": 1, "self": 0.11652259999999992 }, "TrainerController.start_learning": { "total": 7024.4528438, "count": 1, "self": 4.133327600093253, "children": { "TrainerController._reset_env": { "total": 3.228551299999737, "count": 10, "self": 3.228551299999737 }, "TrainerController.advance": { "total": 7016.774306299907, "count": 133823, "self": 4.316766599808034, "children": { "env_step": { "total": 3422.673846700017, "count": 133823, "self": 2706.226638800106, "children": { "SubprocessEnvManager._take_step": { "total": 713.6143469999643, "count": 133823, "self": 23.942558899763412, "children": { "TorchPolicy.evaluate": { "total": 689.6717881002008, "count": 253760, "self": 689.6717881002008 } } }, "workers": { "total": 2.832860899946846, "count": 133823, "self": 0.0, "children": { "worker_root": { "total": 7015.33651089996, "count": 133823, "is_parallel": true, "self": 4809.827519199924, "children": { "steps_from_proto": { "total": 0.022868199999835248, "count": 20, "is_parallel": true, "self": 0.004605199995329823, "children": { "_process_rank_one_or_two_observation": { "total": 0.018263000004505425, "count": 80, "is_parallel": true, "self": 0.018263000004505425 } } }, "UnityEnvironment.step": { "total": 2205.4861235000367, "count": 133823, "is_parallel": true, "self": 107.46140500010642, "children": { "UnityEnvironment._generate_step_input": { "total": 82.49900079976463, "count": 133823, "is_parallel": true, "self": 82.49900079976463 }, "communicator.exchange": { "total": 1685.0467167998993, "count": 133823, "is_parallel": true, "self": 1685.0467167998993 }, "steps_from_proto": { "total": 330.47900090026644, "count": 267646, "is_parallel": true, "self": 67.6377834999011, "children": { "_process_rank_one_or_two_observation": { "total": 262.84121740036534, "count": 1070584, "is_parallel": true, "self": 262.84121740036534 } } } } } } } } } } }, "trainer_advance": { "total": 3589.7836930000817, "count": 133823, "self": 28.07474030005551, "children": { "process_trajectory": { "total": 593.3459357000256, "count": 133823, "self": 591.9375252000257, "children": { "RLTrainer._checkpoint": { "total": 1.4084104999999454, "count": 4, "self": 1.4084104999999454 } } }, "_update_policy": { "total": 2968.3630170000006, "count": 95, "self": 346.12241280002763, "children": { "TorchPOCAOptimizer.update": { "total": 2622.240604199973, "count": 2856, "self": 2622.240604199973 } } } } } } }, "trainer_threads": { "total": 1.2000000424450263e-06, "count": 1, "self": 1.2000000424450263e-06 }, "TrainerController._save_models": { "total": 0.31665739999971265, "count": 1, "self": 0.03600859999914974, "children": { "RLTrainer._checkpoint": { "total": 0.2806488000005629, "count": 1, "self": 0.2806488000005629 } } } } } } }