Updated model with more timesteps

3562cd2 over 1 year ago

15.6 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.5572160482406616,
	"min": 1.4796764850616455,
	"max": 2.316222667694092,
	"count": 692
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 32439.92578125,
	"min": 19549.421875,
	"max": 51443.2890625,
	"count": 692
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 60.79012345679013,
	"min": 37.40816326530612,
	"max": 112.68888888888888,
	"count": 692
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19696.0,
	"min": 7332.0,
	"max": 20892.0,
	"count": 692
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1532.850168432362,
	"min": 1393.8570641808913,
	"max": 1556.063024196752,
	"count": 692
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 248321.72728604265,
	"min": 136472.0477161022,
	"max": 363256.00183768506,
	"count": 692
	},
	"SoccerTwos.Step.mean": {
	"value": 10159890.0,
	"min": 3249918.0,
	"max": 10159890.0,
	"count": 692
	},
	"SoccerTwos.Step.sum": {
	"value": 10159890.0,
	"min": 3249918.0,
	"max": 10159890.0,
	"count": 692
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.017411012202501297,
	"min": -0.11417428404092789,
	"max": 0.15166069567203522,
	"count": 692
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -2.8031728267669678,
	"min": -22.35116958618164,
	"max": 24.698177337646484,
	"count": 692
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.021648546680808067,
	"min": -0.11651396751403809,
	"max": 0.1547374129295349,
	"count": 692
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -3.4854159355163574,
	"min": -22.662694931030273,
	"max": 25.887794494628906,
	"count": 692
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 692
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 692
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.08805714334760394,
	"min": -0.31483267262430475,
	"max": 0.3760611441484682,
	"count": 692
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -14.177200078964233,
	"min": -66.06939989328384,
	"max": 63.40679979324341,
	"count": 692
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.08805714334760394,
	"min": -0.31483267262430475,
	"max": 0.3760611441484682,
	"count": 692
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -14.177200078964233,
	"min": -66.06939989328384,
	"max": 63.40679979324341,
	"count": 692
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 692
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 692
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.019116126811907937,
	"min": 0.009264162741116404,
	"max": 0.023554403986781835,
	"count": 335
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.019116126811907937,
	"min": 0.009264162741116404,
	"max": 0.023554403986781835,
	"count": 335
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.10319636215766272,
	"min": 0.07646674041946729,
	"max": 0.1312625450392564,
	"count": 335
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.10319636215766272,
	"min": 0.07646674041946729,
	"max": 0.1312625450392564,
	"count": 335
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.10433849568168323,
	"min": 0.07763358304897944,
	"max": 0.13379605958859125,
	"count": 335
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.10433849568168323,
	"min": 0.07763358304897944,
	"max": 0.13379605958859125,
	"count": 335
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 335
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 335
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 335
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 335
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 335
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 335
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1685467254",
	"python_version": "3.9.16 (main, Mar 8 2023, 04:29:24) \n[Clang 14.0.6 ]",
	"command_line_arguments": "/Users/cmpatino/Software/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics --resume",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.11.0",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1685492584"
	},
	"total": 24436.345536667002,
	"count": 1,
	"self": 0.29455254100321326,
	"children": {
	"run_training.setup": {
	"total": 0.013618209000000103,
	"count": 1,
	"self": 0.013618209000000103
	},
	"TrainerController.start_learning": {
	"total": 24436.037365917,
	"count": 1,
	"self": 5.864208124992729,
	"children": {
	"TrainerController._reset_env": {
	"total": 2.8453661679947158,
	"count": 36,
	"self": 2.8453661679947158
	},
	"TrainerController.advance": {
	"total": 24427.21861583201,
	"count": 483364,
	"self": 5.138009383848839,
	"children": {
	"env_step": {
	"total": 18952.890719426316,
	"count": 483364,
	"self": 18194.754368307887,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 754.6493575943163,
	"count": 483364,
	"self": 20.78563564614217,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 733.8637219481741,
	"count": 867500,
	"self": 733.8637219481741
	}
	}
	},
	"workers": {
	"total": 3.4869935241127905,
	"count": 483363,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 24425.90537473736,
	"count": 483363,
	"is_parallel": true,
	"self": 6859.6228179850405,
	"children": {
	"steps_from_proto": {
	"total": 0.054453288007503264,
	"count": 72,
	"is_parallel": true,
	"self": 0.00580904101556956,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.048644246991933704,
	"count": 288,
	"is_parallel": true,
	"self": 0.048644246991933704
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 17566.22810346431,
	"count": 483363,
	"is_parallel": true,
	"self": 50.10850966785438,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 318.4643417006471,
	"count": 483363,
	"is_parallel": true,
	"self": 318.4643417006471
	},
	"communicator.exchange": {
	"total": 16486.666769027204,
	"count": 483363,
	"is_parallel": true,
	"self": 16486.666769027204
	},
	"steps_from_proto": {
	"total": 710.988483068601,
	"count": 966726,
	"is_parallel": true,
	"self": 75.7151926292131,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 635.2732904393879,
	"count": 3866904,
	"is_parallel": true,
	"self": 635.2732904393879
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 5469.1898870218465,
	"count": 483363,
	"self": 38.61935983038529,
	"children": {
	"process_trajectory": {
	"total": 1201.0461626584283,
	"count": 483363,
	"self": 1199.561214450426,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 1.484948208002379,
	"count": 14,
	"self": 1.484948208002379
	}
	}
	},
	"_update_policy": {
	"total": 4229.524364533033,
	"count": 336,
	"self": 530.0860378200996,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 3699.4383267129333,
	"count": 10080,
	"self": 3699.4383267129333
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 4.169996827840805e-07,
	"count": 1,
	"self": 4.169996827840805e-07
	},
	"TrainerController._save_models": {
	"total": 0.1091753750006319,
	"count": 1,
	"self": 0.0029110840005159844,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.10626429100011592,
	"count": 1,
	"self": 0.10626429100011592
	}
	}
	}
	}
	}
	}
	}