Upload folder using huggingface_hub
Browse files
.summary/0/events.out.tfevents.1730987699.ali
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87059ecbdc1670d0818d006cee4eb4c1c8f0c91d0d12d9116f813c80be81e325
|
3 |
+
size 375709
|
README.md
CHANGED
@@ -15,7 +15,7 @@ model-index:
|
|
15 |
type: doom_health_gathering_supreme
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
-
value: 3.
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
|
|
15 |
type: doom_health_gathering_supreme
|
16 |
metrics:
|
17 |
- type: mean_reward
|
18 |
+
value: 3.83 +/- 0.50
|
19 |
name: mean_reward
|
20 |
verified: false
|
21 |
---
|
checkpoint_p0/best_000004378_17932288_reward_4.918.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ecda5f7116bb9e8b82a8f5a81375c46de9759c64337027aa765684d78f80e243
|
3 |
+
size 34929243
|
checkpoint_p0/checkpoint_000004691_19214336.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b197e2a9b541c78f571b125dc9e75b3d5088da50bc45baf2aa8ecffbed78f48b
|
3 |
+
size 34929669
|
checkpoint_p0/checkpoint_000004884_20004864.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddd5a144c2fb8ea19a7a0d46d6c5d64863e32ea3dc3a02d29d19a7fd0afcc009
|
3 |
+
size 34929669
|
config.json
CHANGED
@@ -15,7 +15,7 @@
|
|
15 |
"worker_num_splits": 2,
|
16 |
"policy_workers_per_policy": 1,
|
17 |
"max_policy_lag": 1000,
|
18 |
-
"num_workers":
|
19 |
"num_envs_per_worker": 4,
|
20 |
"batch_size": 1024,
|
21 |
"num_batches_per_epoch": 1,
|
@@ -65,7 +65,7 @@
|
|
65 |
"summaries_use_frameskip": true,
|
66 |
"heartbeat_interval": 20,
|
67 |
"heartbeat_reporting_interval": 600,
|
68 |
-
"train_for_env_steps":
|
69 |
"train_for_seconds": 10000000000,
|
70 |
"save_every_sec": 120,
|
71 |
"keep_checkpoints": 2,
|
|
|
15 |
"worker_num_splits": 2,
|
16 |
"policy_workers_per_policy": 1,
|
17 |
"max_policy_lag": 1000,
|
18 |
+
"num_workers": 8,
|
19 |
"num_envs_per_worker": 4,
|
20 |
"batch_size": 1024,
|
21 |
"num_batches_per_epoch": 1,
|
|
|
65 |
"summaries_use_frameskip": true,
|
66 |
"heartbeat_interval": 20,
|
67 |
"heartbeat_reporting_interval": 600,
|
68 |
+
"train_for_env_steps": 20000000,
|
69 |
"train_for_seconds": 10000000000,
|
70 |
"save_every_sec": 120,
|
71 |
"keep_checkpoints": 2,
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90598894c4dea90f4e243e09fa039d69af671c31d0bd0e712e25ff87a80bafba
|
3 |
+
size 5731726
|
sf_log.txt
CHANGED
@@ -8824,3 +8824,790 @@ main_loop: 43.0008
|
|
8824 |
[2024-11-07 15:41:09,303][04584] Avg episode rewards: #0: 4.008, true rewards: #0: 3.808
|
8825 |
[2024-11-07 15:41:09,305][04584] Avg episode reward: 4.008, avg true_objective: 3.808
|
8826 |
[2024-11-07 15:41:24,061][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8824 |
[2024-11-07 15:41:09,303][04584] Avg episode rewards: #0: 4.008, true rewards: #0: 3.808
|
8825 |
[2024-11-07 15:41:09,305][04584] Avg episode reward: 4.008, avg true_objective: 3.808
|
8826 |
[2024-11-07 15:41:24,061][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
|
8827 |
+
[2024-11-07 15:41:33,571][04584] The model has been pushed to https://huggingface.co/alidenewade/rl_course_vizdoom_health_gathering_supreme
|
8828 |
+
[2024-11-07 15:55:04,579][14395] Saving configuration to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json...
|
8829 |
+
[2024-11-07 15:55:04,580][14395] Rollout worker 0 uses device cpu
|
8830 |
+
[2024-11-07 15:55:04,583][14395] Rollout worker 1 uses device cpu
|
8831 |
+
[2024-11-07 15:55:04,586][14395] Rollout worker 2 uses device cpu
|
8832 |
+
[2024-11-07 15:55:04,590][14395] Rollout worker 3 uses device cpu
|
8833 |
+
[2024-11-07 15:55:04,592][14395] Rollout worker 4 uses device cpu
|
8834 |
+
[2024-11-07 15:55:04,594][14395] Rollout worker 5 uses device cpu
|
8835 |
+
[2024-11-07 15:55:04,597][14395] Rollout worker 6 uses device cpu
|
8836 |
+
[2024-11-07 15:55:04,599][14395] Rollout worker 7 uses device cpu
|
8837 |
+
[2024-11-07 15:55:04,705][14395] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
8838 |
+
[2024-11-07 15:55:04,709][14395] InferenceWorker_p0-w0: min num requests: 2
|
8839 |
+
[2024-11-07 15:55:04,753][14395] Starting all processes...
|
8840 |
+
[2024-11-07 15:55:04,756][14395] Starting process learner_proc0
|
8841 |
+
[2024-11-07 15:55:04,934][14395] Starting all processes...
|
8842 |
+
[2024-11-07 15:55:05,018][14395] Starting process inference_proc0-0
|
8843 |
+
[2024-11-07 15:55:05,019][14395] Starting process rollout_proc0
|
8844 |
+
[2024-11-07 15:55:05,020][14395] Starting process rollout_proc1
|
8845 |
+
[2024-11-07 15:55:05,020][14395] Starting process rollout_proc2
|
8846 |
+
[2024-11-07 15:55:05,021][14395] Starting process rollout_proc3
|
8847 |
+
[2024-11-07 15:55:05,021][14395] Starting process rollout_proc4
|
8848 |
+
[2024-11-07 15:55:05,022][14395] Starting process rollout_proc5
|
8849 |
+
[2024-11-07 15:55:05,022][14395] Starting process rollout_proc6
|
8850 |
+
[2024-11-07 15:55:05,023][14395] Starting process rollout_proc7
|
8851 |
+
[2024-11-07 15:55:11,298][14445] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
8852 |
+
[2024-11-07 15:55:11,313][14445] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
8853 |
+
[2024-11-07 15:55:11,489][14477] Worker 4 uses CPU cores [4]
|
8854 |
+
[2024-11-07 15:55:11,561][14466] Worker 2 uses CPU cores [2]
|
8855 |
+
[2024-11-07 15:55:12,164][14445] Num visible devices: 1
|
8856 |
+
[2024-11-07 15:55:12,223][14445] Starting seed is not provided
|
8857 |
+
[2024-11-07 15:55:12,223][14445] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
8858 |
+
[2024-11-07 15:55:12,223][14445] Initializing actor-critic model on device cuda:0
|
8859 |
+
[2024-11-07 15:55:12,224][14445] RunningMeanStd input shape: (3, 72, 128)
|
8860 |
+
[2024-11-07 15:55:12,226][14445] RunningMeanStd input shape: (1,)
|
8861 |
+
[2024-11-07 15:55:12,331][14445] ConvEncoder: input_channels=3
|
8862 |
+
[2024-11-07 15:55:12,727][14468] Worker 3 uses CPU cores [3]
|
8863 |
+
[2024-11-07 15:55:12,770][14445] Conv encoder output size: 512
|
8864 |
+
[2024-11-07 15:55:12,771][14445] Policy head output size: 512
|
8865 |
+
[2024-11-07 15:55:12,806][14445] Created Actor Critic model with architecture:
|
8866 |
+
[2024-11-07 15:55:12,807][14445] ActorCriticSharedWeights(
|
8867 |
+
(obs_normalizer): ObservationNormalizer(
|
8868 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
8869 |
+
(running_mean_std): ModuleDict(
|
8870 |
+
(obs): RunningMeanStdInPlace()
|
8871 |
+
)
|
8872 |
+
)
|
8873 |
+
)
|
8874 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
8875 |
+
(encoder): VizdoomEncoder(
|
8876 |
+
(basic_encoder): ConvEncoder(
|
8877 |
+
(enc): RecursiveScriptModule(
|
8878 |
+
original_name=ConvEncoderImpl
|
8879 |
+
(conv_head): RecursiveScriptModule(
|
8880 |
+
original_name=Sequential
|
8881 |
+
(0): RecursiveScriptModule(original_name=Conv2d)
|
8882 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
8883 |
+
(2): RecursiveScriptModule(original_name=Conv2d)
|
8884 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
8885 |
+
(4): RecursiveScriptModule(original_name=Conv2d)
|
8886 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
8887 |
+
)
|
8888 |
+
(mlp_layers): RecursiveScriptModule(
|
8889 |
+
original_name=Sequential
|
8890 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
8891 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
8892 |
+
)
|
8893 |
+
)
|
8894 |
+
)
|
8895 |
+
)
|
8896 |
+
(core): ModelCoreRNN(
|
8897 |
+
(core): GRU(512, 512)
|
8898 |
+
)
|
8899 |
+
(decoder): MlpDecoder(
|
8900 |
+
(mlp): Identity()
|
8901 |
+
)
|
8902 |
+
(critic_linear): Linear(in_features=512, out_features=1, bias=True)
|
8903 |
+
(action_parameterization): ActionParameterizationDefault(
|
8904 |
+
(distribution_linear): Linear(in_features=512, out_features=5, bias=True)
|
8905 |
+
)
|
8906 |
+
)
|
8907 |
+
[2024-11-07 15:55:13,345][14469] Worker 7 uses CPU cores [0, 1, 2, 3, 4, 5, 6]
|
8908 |
+
[2024-11-07 15:55:13,552][14467] Worker 1 uses CPU cores [1]
|
8909 |
+
[2024-11-07 15:55:13,584][14462] Worker 0 uses CPU cores [0]
|
8910 |
+
[2024-11-07 15:55:13,653][14461] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
8911 |
+
[2024-11-07 15:55:13,654][14461] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
8912 |
+
[2024-11-07 15:55:13,687][14461] Num visible devices: 1
|
8913 |
+
[2024-11-07 15:55:14,018][14478] Worker 5 uses CPU cores [5]
|
8914 |
+
[2024-11-07 15:55:14,479][14470] Worker 6 uses CPU cores [6]
|
8915 |
+
[2024-11-07 15:55:14,948][14445] Using optimizer <class 'torch.optim.adam.Adam'>
|
8916 |
+
[2024-11-07 15:55:16,722][14445] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003910_16015360.pth...
|
8917 |
+
[2024-11-07 15:55:16,819][14445] Loading model from checkpoint
|
8918 |
+
[2024-11-07 15:55:16,822][14445] Loaded experiment state at self.train_step=3910, self.env_steps=16015360
|
8919 |
+
[2024-11-07 15:55:16,822][14445] Initialized policy 0 weights for model version 3910
|
8920 |
+
[2024-11-07 15:55:16,834][14445] LearnerWorker_p0 finished initialization!
|
8921 |
+
[2024-11-07 15:55:16,834][14445] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
8922 |
+
[2024-11-07 15:55:17,090][14461] RunningMeanStd input shape: (3, 72, 128)
|
8923 |
+
[2024-11-07 15:55:17,092][14461] RunningMeanStd input shape: (1,)
|
8924 |
+
[2024-11-07 15:55:17,104][14461] ConvEncoder: input_channels=3
|
8925 |
+
[2024-11-07 15:55:17,233][14461] Conv encoder output size: 512
|
8926 |
+
[2024-11-07 15:55:17,234][14461] Policy head output size: 512
|
8927 |
+
[2024-11-07 15:55:17,302][14395] Inference worker 0-0 is ready!
|
8928 |
+
[2024-11-07 15:55:17,303][14395] All inference workers are ready! Signal rollout workers to start!
|
8929 |
+
[2024-11-07 15:55:17,409][14477] Doom resolution: 160x120, resize resolution: (128, 72)
|
8930 |
+
[2024-11-07 15:55:17,415][14468] Doom resolution: 160x120, resize resolution: (128, 72)
|
8931 |
+
[2024-11-07 15:55:17,439][14466] Doom resolution: 160x120, resize resolution: (128, 72)
|
8932 |
+
[2024-11-07 15:55:17,467][14467] Doom resolution: 160x120, resize resolution: (128, 72)
|
8933 |
+
[2024-11-07 15:55:17,478][14462] Doom resolution: 160x120, resize resolution: (128, 72)
|
8934 |
+
[2024-11-07 15:55:17,512][14478] Doom resolution: 160x120, resize resolution: (128, 72)
|
8935 |
+
[2024-11-07 15:55:17,571][14470] Doom resolution: 160x120, resize resolution: (128, 72)
|
8936 |
+
[2024-11-07 15:55:17,576][14469] Doom resolution: 160x120, resize resolution: (128, 72)
|
8937 |
+
[2024-11-07 15:55:18,028][14468] Decorrelating experience for 0 frames...
|
8938 |
+
[2024-11-07 15:55:18,032][14477] Decorrelating experience for 0 frames...
|
8939 |
+
[2024-11-07 15:55:18,032][14466] Decorrelating experience for 0 frames...
|
8940 |
+
[2024-11-07 15:55:18,261][14467] Decorrelating experience for 0 frames...
|
8941 |
+
[2024-11-07 15:55:18,314][14462] Decorrelating experience for 0 frames...
|
8942 |
+
[2024-11-07 15:55:18,449][14469] Decorrelating experience for 0 frames...
|
8943 |
+
[2024-11-07 15:55:18,583][14477] Decorrelating experience for 32 frames...
|
8944 |
+
[2024-11-07 15:55:18,825][14468] Decorrelating experience for 32 frames...
|
8945 |
+
[2024-11-07 15:55:18,828][14467] Decorrelating experience for 32 frames...
|
8946 |
+
[2024-11-07 15:55:18,837][14462] Decorrelating experience for 32 frames...
|
8947 |
+
[2024-11-07 15:55:18,977][14478] Decorrelating experience for 0 frames...
|
8948 |
+
[2024-11-07 15:55:19,335][14477] Decorrelating experience for 64 frames...
|
8949 |
+
[2024-11-07 15:55:19,454][14478] Decorrelating experience for 32 frames...
|
8950 |
+
[2024-11-07 15:55:19,456][14466] Decorrelating experience for 32 frames...
|
8951 |
+
[2024-11-07 15:55:19,552][14462] Decorrelating experience for 64 frames...
|
8952 |
+
[2024-11-07 15:55:19,814][14477] Decorrelating experience for 96 frames...
|
8953 |
+
[2024-11-07 15:55:19,889][14470] Decorrelating experience for 0 frames...
|
8954 |
+
[2024-11-07 15:55:19,910][14467] Decorrelating experience for 64 frames...
|
8955 |
+
[2024-11-07 15:55:19,927][14395] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 16015360. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
8956 |
+
[2024-11-07 15:55:20,253][14462] Decorrelating experience for 96 frames...
|
8957 |
+
[2024-11-07 15:55:20,461][14478] Decorrelating experience for 64 frames...
|
8958 |
+
[2024-11-07 15:55:20,464][14466] Decorrelating experience for 64 frames...
|
8959 |
+
[2024-11-07 15:55:20,466][14469] Decorrelating experience for 32 frames...
|
8960 |
+
[2024-11-07 15:55:20,646][14470] Decorrelating experience for 32 frames...
|
8961 |
+
[2024-11-07 15:55:20,674][14467] Decorrelating experience for 96 frames...
|
8962 |
+
[2024-11-07 15:55:21,007][14468] Decorrelating experience for 64 frames...
|
8963 |
+
[2024-11-07 15:55:21,032][14466] Decorrelating experience for 96 frames...
|
8964 |
+
[2024-11-07 15:55:21,394][14478] Decorrelating experience for 96 frames...
|
8965 |
+
[2024-11-07 15:55:21,694][14468] Decorrelating experience for 96 frames...
|
8966 |
+
[2024-11-07 15:55:21,701][14469] Decorrelating experience for 64 frames...
|
8967 |
+
[2024-11-07 15:55:21,913][14470] Decorrelating experience for 64 frames...
|
8968 |
+
[2024-11-07 15:55:22,539][14469] Decorrelating experience for 96 frames...
|
8969 |
+
[2024-11-07 15:55:23,773][14445] Signal inference workers to stop experience collection...
|
8970 |
+
[2024-11-07 15:55:23,794][14461] InferenceWorker_p0-w0: stopping experience collection
|
8971 |
+
[2024-11-07 15:55:23,856][14470] Decorrelating experience for 96 frames...
|
8972 |
+
[2024-11-07 15:55:25,856][14395] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 16015360. Throughput: 0: 338.7. Samples: 2008. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
8973 |
+
[2024-11-07 15:55:25,860][14395] Avg episode reward: [(0, '2.242')]
|
8974 |
+
[2024-11-07 15:55:25,865][14395] Heartbeat connected on RolloutWorker_w1
|
8975 |
+
[2024-11-07 15:55:25,866][14395] Heartbeat connected on InferenceWorker_p0-w0
|
8976 |
+
[2024-11-07 15:55:25,869][14395] Heartbeat connected on RolloutWorker_w6
|
8977 |
+
[2024-11-07 15:55:25,870][14395] Heartbeat connected on RolloutWorker_w7
|
8978 |
+
[2024-11-07 15:55:25,872][14395] Heartbeat connected on RolloutWorker_w2
|
8979 |
+
[2024-11-07 15:55:25,873][14395] Heartbeat connected on RolloutWorker_w0
|
8980 |
+
[2024-11-07 15:55:25,876][14395] Heartbeat connected on Batcher_0
|
8981 |
+
[2024-11-07 15:55:25,877][14395] Heartbeat connected on RolloutWorker_w5
|
8982 |
+
[2024-11-07 15:55:25,883][14395] Heartbeat connected on RolloutWorker_w3
|
8983 |
+
[2024-11-07 15:55:25,889][14395] Heartbeat connected on RolloutWorker_w4
|
8984 |
+
[2024-11-07 15:55:29,927][14395] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 16015360. Throughput: 0: 247.8. Samples: 2478. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
8985 |
+
[2024-11-07 15:55:29,929][14395] Avg episode reward: [(0, '2.242')]
|
8986 |
+
[2024-11-07 15:55:30,500][14445] Signal inference workers to resume experience collection...
|
8987 |
+
[2024-11-07 15:55:30,501][14461] InferenceWorker_p0-w0: resuming experience collection
|
8988 |
+
[2024-11-07 15:55:31,048][14395] Heartbeat connected on LearnerWorker_p0
|
8989 |
+
[2024-11-07 15:55:34,927][14395] Fps is (10 sec: 3160.8, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 16044032. Throughput: 0: 313.5. Samples: 4702. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
8990 |
+
[2024-11-07 15:55:34,933][14395] Avg episode reward: [(0, '3.523')]
|
8991 |
+
[2024-11-07 15:55:36,987][14461] Updated weights for policy 0, policy_version 3920 (0.0055)
|
8992 |
+
[2024-11-07 15:55:39,928][14395] Fps is (10 sec: 5734.1, 60 sec: 2867.2, 300 sec: 2867.2). Total num frames: 16072704. Throughput: 0: 644.5. Samples: 12890. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
8993 |
+
[2024-11-07 15:55:39,934][14395] Avg episode reward: [(0, '4.344')]
|
8994 |
+
[2024-11-07 15:55:44,929][14395] Fps is (10 sec: 4914.8, 60 sec: 3112.9, 300 sec: 3112.9). Total num frames: 16093184. Throughput: 0: 794.9. Samples: 19872. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
8995 |
+
[2024-11-07 15:55:44,931][14395] Avg episode reward: [(0, '4.448')]
|
8996 |
+
[2024-11-07 15:55:45,034][14461] Updated weights for policy 0, policy_version 3930 (0.0028)
|
8997 |
+
[2024-11-07 15:55:49,927][14395] Fps is (10 sec: 4915.4, 60 sec: 3549.9, 300 sec: 3549.9). Total num frames: 16121856. Throughput: 0: 820.3. Samples: 24610. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
8998 |
+
[2024-11-07 15:55:49,929][14395] Avg episode reward: [(0, '4.447')]
|
8999 |
+
[2024-11-07 15:55:52,269][14461] Updated weights for policy 0, policy_version 3940 (0.0045)
|
9000 |
+
[2024-11-07 15:55:54,927][14395] Fps is (10 sec: 6554.4, 60 sec: 4096.0, 300 sec: 4096.0). Total num frames: 16158720. Throughput: 0: 957.8. Samples: 33524. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9001 |
+
[2024-11-07 15:55:54,930][14395] Avg episode reward: [(0, '4.553')]
|
9002 |
+
[2024-11-07 15:55:58,004][14461] Updated weights for policy 0, policy_version 3950 (0.0031)
|
9003 |
+
[2024-11-07 15:55:59,928][14395] Fps is (10 sec: 5734.2, 60 sec: 4096.0, 300 sec: 4096.0). Total num frames: 16179200. Throughput: 0: 1050.7. Samples: 42028. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9004 |
+
[2024-11-07 15:55:59,934][14395] Avg episode reward: [(0, '4.512')]
|
9005 |
+
[2024-11-07 15:56:04,928][14395] Fps is (10 sec: 4915.1, 60 sec: 4278.1, 300 sec: 4278.1). Total num frames: 16207872. Throughput: 0: 1004.8. Samples: 45216. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9006 |
+
[2024-11-07 15:56:04,930][14395] Avg episode reward: [(0, '4.356')]
|
9007 |
+
[2024-11-07 15:56:06,269][14461] Updated weights for policy 0, policy_version 3960 (0.0029)
|
9008 |
+
[2024-11-07 15:56:09,927][14395] Fps is (10 sec: 6963.4, 60 sec: 4669.5, 300 sec: 4669.5). Total num frames: 16248832. Throughput: 0: 1221.1. Samples: 55822. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9009 |
+
[2024-11-07 15:56:09,929][14395] Avg episode reward: [(0, '4.361')]
|
9010 |
+
[2024-11-07 15:56:11,796][14461] Updated weights for policy 0, policy_version 3970 (0.0027)
|
9011 |
+
[2024-11-07 15:56:14,936][14395] Fps is (10 sec: 6548.2, 60 sec: 4691.1, 300 sec: 4691.1). Total num frames: 16273408. Throughput: 0: 1383.7. Samples: 64754. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9012 |
+
[2024-11-07 15:56:14,939][14395] Avg episode reward: [(0, '4.444')]
|
9013 |
+
[2024-11-07 15:56:19,928][14395] Fps is (10 sec: 4095.7, 60 sec: 4573.8, 300 sec: 4573.8). Total num frames: 16289792. Throughput: 0: 1394.7. Samples: 67462. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9014 |
+
[2024-11-07 15:56:19,931][14395] Avg episode reward: [(0, '4.359')]
|
9015 |
+
[2024-11-07 15:56:21,825][14461] Updated weights for policy 0, policy_version 3980 (0.0043)
|
9016 |
+
[2024-11-07 15:56:24,927][14395] Fps is (10 sec: 4509.4, 60 sec: 5131.2, 300 sec: 4663.2). Total num frames: 16318464. Throughput: 0: 1371.5. Samples: 74606. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9017 |
+
[2024-11-07 15:56:24,932][14395] Avg episode reward: [(0, '4.307')]
|
9018 |
+
[2024-11-07 15:56:27,554][14461] Updated weights for policy 0, policy_version 3990 (0.0023)
|
9019 |
+
[2024-11-07 15:56:29,928][14395] Fps is (10 sec: 6553.7, 60 sec: 5666.1, 300 sec: 4856.7). Total num frames: 16355328. Throughput: 0: 1448.9. Samples: 85070. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9020 |
+
[2024-11-07 15:56:29,931][14395] Avg episode reward: [(0, '4.548')]
|
9021 |
+
[2024-11-07 15:56:34,927][14395] Fps is (10 sec: 5734.3, 60 sec: 5529.6, 300 sec: 4806.0). Total num frames: 16375808. Throughput: 0: 1439.1. Samples: 89368. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
9022 |
+
[2024-11-07 15:56:34,929][14395] Avg episode reward: [(0, '4.400')]
|
9023 |
+
[2024-11-07 15:56:35,600][14461] Updated weights for policy 0, policy_version 4000 (0.0043)
|
9024 |
+
[2024-11-07 15:56:39,927][14395] Fps is (10 sec: 6144.3, 60 sec: 5734.4, 300 sec: 5017.6). Total num frames: 16416768. Throughput: 0: 1447.8. Samples: 98676. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
9025 |
+
[2024-11-07 15:56:39,929][14395] Avg episode reward: [(0, '4.556')]
|
9026 |
+
[2024-11-07 15:56:40,909][14461] Updated weights for policy 0, policy_version 4010 (0.0028)
|
9027 |
+
[2024-11-07 15:56:44,927][14395] Fps is (10 sec: 6553.6, 60 sec: 5802.8, 300 sec: 5011.6). Total num frames: 16441344. Throughput: 0: 1453.7. Samples: 107446. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9028 |
+
[2024-11-07 15:56:44,940][14395] Avg episode reward: [(0, '4.731')]
|
9029 |
+
[2024-11-07 15:56:49,927][14395] Fps is (10 sec: 4505.5, 60 sec: 5666.1, 300 sec: 4960.7). Total num frames: 16461824. Throughput: 0: 1449.9. Samples: 110462. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9030 |
+
[2024-11-07 15:56:49,930][14395] Avg episode reward: [(0, '4.555')]
|
9031 |
+
[2024-11-07 15:56:50,788][14461] Updated weights for policy 0, policy_version 4020 (0.0026)
|
9032 |
+
[2024-11-07 15:56:54,928][14395] Fps is (10 sec: 4095.9, 60 sec: 5393.0, 300 sec: 4915.2). Total num frames: 16482304. Throughput: 0: 1338.9. Samples: 116072. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9033 |
+
[2024-11-07 15:56:54,932][14395] Avg episode reward: [(0, '4.357')]
|
9034 |
+
[2024-11-07 15:56:59,928][14395] Fps is (10 sec: 4095.9, 60 sec: 5393.1, 300 sec: 4874.2). Total num frames: 16502784. Throughput: 0: 1269.0. Samples: 121850. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9035 |
+
[2024-11-07 15:56:59,932][14395] Avg episode reward: [(0, '4.413')]
|
9036 |
+
[2024-11-07 15:56:59,977][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004029_16502784.pth...
|
9037 |
+
[2024-11-07 15:57:00,368][14445] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth
|
9038 |
+
[2024-11-07 15:57:00,908][14461] Updated weights for policy 0, policy_version 4030 (0.0067)
|
9039 |
+
[2024-11-07 15:57:04,928][14395] Fps is (10 sec: 4505.4, 60 sec: 5324.7, 300 sec: 4876.2). Total num frames: 16527360. Throughput: 0: 1281.3. Samples: 125120. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9040 |
+
[2024-11-07 15:57:04,931][14395] Avg episode reward: [(0, '4.501')]
|
9041 |
+
[2024-11-07 15:57:09,928][14395] Fps is (10 sec: 4095.6, 60 sec: 4915.1, 300 sec: 4803.4). Total num frames: 16543744. Throughput: 0: 1247.7. Samples: 130756. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9042 |
+
[2024-11-07 15:57:09,931][14395] Avg episode reward: [(0, '4.539')]
|
9043 |
+
[2024-11-07 15:57:10,937][14461] Updated weights for policy 0, policy_version 4040 (0.0040)
|
9044 |
+
[2024-11-07 15:57:14,927][14395] Fps is (10 sec: 4096.3, 60 sec: 4915.9, 300 sec: 4808.4). Total num frames: 16568320. Throughput: 0: 1192.9. Samples: 138748. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9045 |
+
[2024-11-07 15:57:14,930][14395] Avg episode reward: [(0, '4.564')]
|
9046 |
+
[2024-11-07 15:57:19,301][14461] Updated weights for policy 0, policy_version 4050 (0.0059)
|
9047 |
+
[2024-11-07 15:57:19,935][14395] Fps is (10 sec: 4502.5, 60 sec: 4982.8, 300 sec: 4778.3). Total num frames: 16588800. Throughput: 0: 1185.9. Samples: 142742. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9048 |
+
[2024-11-07 15:57:19,938][14395] Avg episode reward: [(0, '4.485')]
|
9049 |
+
[2024-11-07 15:57:24,940][14395] Fps is (10 sec: 4499.7, 60 sec: 4914.1, 300 sec: 4783.6). Total num frames: 16613376. Throughput: 0: 1116.7. Samples: 148942. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9050 |
+
[2024-11-07 15:57:24,942][14395] Avg episode reward: [(0, '4.436')]
|
9051 |
+
[2024-11-07 15:57:27,985][14461] Updated weights for policy 0, policy_version 4060 (0.0038)
|
9052 |
+
[2024-11-07 15:57:29,930][14395] Fps is (10 sec: 4917.8, 60 sec: 4710.2, 300 sec: 4789.1). Total num frames: 16637952. Throughput: 0: 1088.2. Samples: 156418. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9053 |
+
[2024-11-07 15:57:29,933][14395] Avg episode reward: [(0, '4.518')]
|
9054 |
+
[2024-11-07 15:57:34,927][14395] Fps is (10 sec: 5331.8, 60 sec: 4846.9, 300 sec: 4824.2). Total num frames: 16666624. Throughput: 0: 1114.8. Samples: 160630. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9055 |
+
[2024-11-07 15:57:34,936][14395] Avg episode reward: [(0, '4.508')]
|
9056 |
+
[2024-11-07 15:57:35,266][14461] Updated weights for policy 0, policy_version 4070 (0.0031)
|
9057 |
+
[2024-11-07 15:57:39,927][14395] Fps is (10 sec: 4916.6, 60 sec: 4505.6, 300 sec: 4798.2). Total num frames: 16687104. Throughput: 0: 1131.8. Samples: 167002. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9058 |
+
[2024-11-07 15:57:39,931][14395] Avg episode reward: [(0, '4.463')]
|
9059 |
+
[2024-11-07 15:57:44,927][14395] Fps is (10 sec: 3276.8, 60 sec: 4300.8, 300 sec: 4717.5). Total num frames: 16699392. Throughput: 0: 1109.5. Samples: 171778. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9060 |
+
[2024-11-07 15:57:44,931][14395] Avg episode reward: [(0, '4.400')]
|
9061 |
+
[2024-11-07 15:57:46,388][14461] Updated weights for policy 0, policy_version 4080 (0.0058)
|
9062 |
+
[2024-11-07 15:57:49,927][14395] Fps is (10 sec: 4915.3, 60 sec: 4573.9, 300 sec: 4806.0). Total num frames: 16736256. Throughput: 0: 1139.0. Samples: 176372. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9063 |
+
[2024-11-07 15:57:49,929][14395] Avg episode reward: [(0, '4.383')]
|
9064 |
+
[2024-11-07 15:57:51,689][14461] Updated weights for policy 0, policy_version 4090 (0.0024)
|
9065 |
+
[2024-11-07 15:57:54,927][14395] Fps is (10 sec: 7782.4, 60 sec: 4915.2, 300 sec: 4915.2). Total num frames: 16777216. Throughput: 0: 1275.9. Samples: 188170. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9066 |
+
[2024-11-07 15:57:54,929][14395] Avg episode reward: [(0, '4.469')]
|
9067 |
+
[2024-11-07 15:57:57,147][14461] Updated weights for policy 0, policy_version 4100 (0.0032)
|
9068 |
+
[2024-11-07 15:57:59,927][14395] Fps is (10 sec: 7372.9, 60 sec: 5120.0, 300 sec: 4966.4). Total num frames: 16809984. Throughput: 0: 1340.7. Samples: 199080. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9069 |
+
[2024-11-07 15:57:59,929][14395] Avg episode reward: [(0, '4.689')]
|
9070 |
+
[2024-11-07 15:58:04,528][14461] Updated weights for policy 0, policy_version 4110 (0.0048)
|
9071 |
+
[2024-11-07 15:58:04,927][14395] Fps is (10 sec: 5734.4, 60 sec: 5120.1, 300 sec: 4964.9). Total num frames: 16834560. Throughput: 0: 1332.6. Samples: 202700. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9072 |
+
[2024-11-07 15:58:04,929][14395] Avg episode reward: [(0, '4.492')]
|
9073 |
+
[2024-11-07 15:58:09,927][14395] Fps is (10 sec: 6144.0, 60 sec: 5461.5, 300 sec: 5035.7). Total num frames: 16871424. Throughput: 0: 1410.6. Samples: 212400. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9074 |
+
[2024-11-07 15:58:09,929][14395] Avg episode reward: [(0, '4.335')]
|
9075 |
+
[2024-11-07 15:58:10,164][14461] Updated weights for policy 0, policy_version 4120 (0.0027)
|
9076 |
+
[2024-11-07 15:58:16,757][14395] Fps is (10 sec: 5193.6, 60 sec: 5299.7, 300 sec: 4980.2). Total num frames: 16896000. Throughput: 0: 1304.1. Samples: 217486. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9077 |
+
[2024-11-07 15:58:16,759][14395] Avg episode reward: [(0, '4.390')]
|
9078 |
+
[2024-11-07 15:58:19,359][14461] Updated weights for policy 0, policy_version 4130 (0.0031)
|
9079 |
+
[2024-11-07 15:58:19,927][14395] Fps is (10 sec: 4505.6, 60 sec: 5462.1, 300 sec: 5006.2). Total num frames: 16916480. Throughput: 0: 1369.6. Samples: 222262. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
9080 |
+
[2024-11-07 15:58:19,930][14395] Avg episode reward: [(0, '4.352')]
|
9081 |
+
[2024-11-07 15:58:24,928][14395] Fps is (10 sec: 6517.1, 60 sec: 5599.1, 300 sec: 5048.0). Total num frames: 16949248. Throughput: 0: 1459.8. Samples: 232694. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9082 |
+
[2024-11-07 15:58:24,930][14395] Avg episode reward: [(0, '4.351')]
|
9083 |
+
[2024-11-07 15:58:25,722][14461] Updated weights for policy 0, policy_version 4140 (0.0038)
|
9084 |
+
[2024-11-07 15:58:29,927][14395] Fps is (10 sec: 6553.6, 60 sec: 5734.7, 300 sec: 5087.7). Total num frames: 16982016. Throughput: 0: 1550.2. Samples: 241538. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9085 |
+
[2024-11-07 15:58:29,929][14395] Avg episode reward: [(0, '4.556')]
|
9086 |
+
[2024-11-07 15:58:31,857][14461] Updated weights for policy 0, policy_version 4150 (0.0032)
|
9087 |
+
[2024-11-07 15:58:34,927][14395] Fps is (10 sec: 7373.2, 60 sec: 5939.2, 300 sec: 5167.3). Total num frames: 17022976. Throughput: 0: 1578.4. Samples: 247402. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9088 |
+
[2024-11-07 15:58:34,929][14395] Avg episode reward: [(0, '4.514')]
|
9089 |
+
[2024-11-07 15:58:37,073][14461] Updated weights for policy 0, policy_version 4160 (0.0021)
|
9090 |
+
[2024-11-07 15:58:39,927][14395] Fps is (10 sec: 7782.4, 60 sec: 6212.3, 300 sec: 5222.4). Total num frames: 17059840. Throughput: 0: 1581.1. Samples: 259320. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9091 |
+
[2024-11-07 15:58:39,928][14395] Avg episode reward: [(0, '4.365')]
|
9092 |
+
[2024-11-07 15:58:42,649][14461] Updated weights for policy 0, policy_version 4170 (0.0027)
|
9093 |
+
[2024-11-07 15:58:44,927][14395] Fps is (10 sec: 6963.1, 60 sec: 6553.6, 300 sec: 5254.9). Total num frames: 17092608. Throughput: 0: 1568.2. Samples: 269650. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9094 |
+
[2024-11-07 15:58:44,929][14395] Avg episode reward: [(0, '4.379')]
|
9095 |
+
[2024-11-07 15:58:48,952][14461] Updated weights for policy 0, policy_version 4180 (0.0025)
|
9096 |
+
[2024-11-07 15:58:51,218][14395] Fps is (10 sec: 5441.7, 60 sec: 6282.0, 300 sec: 5234.1). Total num frames: 17121280. Throughput: 0: 1553.4. Samples: 274606. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9097 |
+
[2024-11-07 15:58:51,220][14395] Avg episode reward: [(0, '4.417')]
|
9098 |
+
[2024-11-07 15:58:54,927][14395] Fps is (10 sec: 5324.6, 60 sec: 6144.0, 300 sec: 5258.1). Total num frames: 17145856. Throughput: 0: 1511.7. Samples: 280426. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9099 |
+
[2024-11-07 15:58:54,930][14395] Avg episode reward: [(0, '4.625')]
|
9100 |
+
[2024-11-07 15:58:57,073][14461] Updated weights for policy 0, policy_version 4190 (0.0026)
|
9101 |
+
[2024-11-07 15:58:59,927][14395] Fps is (10 sec: 7054.4, 60 sec: 6212.3, 300 sec: 5306.2). Total num frames: 17182720. Throughput: 0: 1721.4. Samples: 291798. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9102 |
+
[2024-11-07 15:58:59,929][14395] Avg episode reward: [(0, '4.445')]
|
9103 |
+
[2024-11-07 15:58:59,947][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004195_17182720.pth...
|
9104 |
+
[2024-11-07 15:59:00,087][14445] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003910_16015360.pth
|
9105 |
+
[2024-11-07 15:59:02,802][14461] Updated weights for policy 0, policy_version 4200 (0.0023)
|
9106 |
+
[2024-11-07 15:59:04,928][14395] Fps is (10 sec: 7372.4, 60 sec: 6417.0, 300 sec: 5352.1). Total num frames: 17219584. Throughput: 0: 1662.1. Samples: 297056. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9107 |
+
[2024-11-07 15:59:04,931][14395] Avg episode reward: [(0, '4.652')]
|
9108 |
+
[2024-11-07 15:59:08,029][14461] Updated weights for policy 0, policy_version 4210 (0.0024)
|
9109 |
+
[2024-11-07 15:59:09,930][14395] Fps is (10 sec: 7370.7, 60 sec: 6416.8, 300 sec: 5396.0). Total num frames: 17256448. Throughput: 0: 1687.7. Samples: 308644. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9110 |
+
[2024-11-07 15:59:09,933][14395] Avg episode reward: [(0, '4.373')]
|
9111 |
+
[2024-11-07 15:59:14,459][14461] Updated weights for policy 0, policy_version 4220 (0.0045)
|
9112 |
+
[2024-11-07 15:59:14,927][14395] Fps is (10 sec: 6554.2, 60 sec: 6689.4, 300 sec: 5403.2). Total num frames: 17285120. Throughput: 0: 1698.0. Samples: 317946. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9113 |
+
[2024-11-07 15:59:14,932][14395] Avg episode reward: [(0, '4.586')]
|
9114 |
+
[2024-11-07 15:59:19,928][14395] Fps is (10 sec: 6145.4, 60 sec: 6690.1, 300 sec: 5427.2). Total num frames: 17317888. Throughput: 0: 1677.6. Samples: 322894. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9115 |
+
[2024-11-07 15:59:19,931][14395] Avg episode reward: [(0, '4.264')]
|
9116 |
+
[2024-11-07 15:59:20,972][14461] Updated weights for policy 0, policy_version 4230 (0.0044)
|
9117 |
+
[2024-11-07 15:59:25,496][14395] Fps is (10 sec: 4650.4, 60 sec: 6356.8, 300 sec: 5370.8). Total num frames: 17334272. Throughput: 0: 1578.1. Samples: 331234. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9118 |
+
[2024-11-07 15:59:25,502][14395] Avg episode reward: [(0, '4.232')]
|
9119 |
+
[2024-11-07 15:59:29,927][14395] Fps is (10 sec: 3686.5, 60 sec: 6212.2, 300 sec: 5357.6). Total num frames: 17354752. Throughput: 0: 1457.4. Samples: 335232. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9120 |
+
[2024-11-07 15:59:29,929][14395] Avg episode reward: [(0, '4.326')]
|
9121 |
+
[2024-11-07 15:59:31,611][14461] Updated weights for policy 0, policy_version 4240 (0.0051)
|
9122 |
+
[2024-11-07 15:59:34,927][14395] Fps is (10 sec: 5646.1, 60 sec: 6075.7, 300 sec: 5381.0). Total num frames: 17387520. Throughput: 0: 1491.8. Samples: 339814. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9123 |
+
[2024-11-07 15:59:34,929][14395] Avg episode reward: [(0, '4.443')]
|
9124 |
+
[2024-11-07 15:59:37,263][14461] Updated weights for policy 0, policy_version 4250 (0.0036)
|
9125 |
+
[2024-11-07 15:59:39,927][14395] Fps is (10 sec: 7373.0, 60 sec: 6144.0, 300 sec: 5435.1). Total num frames: 17428480. Throughput: 0: 1568.5. Samples: 351010. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9126 |
+
[2024-11-07 15:59:39,932][14395] Avg episode reward: [(0, '4.296')]
|
9127 |
+
[2024-11-07 15:59:42,885][14461] Updated weights for policy 0, policy_version 4260 (0.0025)
|
9128 |
+
[2024-11-07 15:59:44,927][14395] Fps is (10 sec: 7373.0, 60 sec: 6144.0, 300 sec: 5456.2). Total num frames: 17461248. Throughput: 0: 1548.5. Samples: 361482. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9129 |
+
[2024-11-07 15:59:44,930][14395] Avg episode reward: [(0, '4.273')]
|
9130 |
+
[2024-11-07 15:59:48,105][14461] Updated weights for policy 0, policy_version 4270 (0.0021)
|
9131 |
+
[2024-11-07 15:59:49,927][14395] Fps is (10 sec: 7372.7, 60 sec: 6488.3, 300 sec: 5506.8). Total num frames: 17502208. Throughput: 0: 1569.5. Samples: 367682. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9132 |
+
[2024-11-07 15:59:49,929][14395] Avg episode reward: [(0, '4.594')]
|
9133 |
+
[2024-11-07 15:59:53,467][14461] Updated weights for policy 0, policy_version 4280 (0.0023)
|
9134 |
+
[2024-11-07 15:59:54,928][14395] Fps is (10 sec: 7782.0, 60 sec: 6553.6, 300 sec: 5540.8). Total num frames: 17539072. Throughput: 0: 1574.4. Samples: 379488. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9135 |
+
[2024-11-07 15:59:54,932][14395] Avg episode reward: [(0, '4.520')]
|
9136 |
+
[2024-11-07 16:00:00,137][14395] Fps is (10 sec: 5616.5, 60 sec: 6258.6, 300 sec: 5510.8). Total num frames: 17559552. Throughput: 0: 1484.8. Samples: 385072. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9137 |
+
[2024-11-07 16:00:00,140][14395] Avg episode reward: [(0, '4.470')]
|
9138 |
+
[2024-11-07 16:00:01,768][14461] Updated weights for policy 0, policy_version 4290 (0.0040)
|
9139 |
+
[2024-11-07 16:00:04,927][14395] Fps is (10 sec: 4505.7, 60 sec: 6075.8, 300 sec: 5504.5). Total num frames: 17584128. Throughput: 0: 1506.7. Samples: 390696. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9140 |
+
[2024-11-07 16:00:04,930][14395] Avg episode reward: [(0, '4.333')]
|
9141 |
+
[2024-11-07 16:00:08,597][14461] Updated weights for policy 0, policy_version 4300 (0.0031)
|
9142 |
+
[2024-11-07 16:00:09,927][14395] Fps is (10 sec: 6275.8, 60 sec: 6076.0, 300 sec: 5536.7). Total num frames: 17620992. Throughput: 0: 1537.9. Samples: 399564. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9143 |
+
[2024-11-07 16:00:09,929][14395] Avg episode reward: [(0, '4.285')]
|
9144 |
+
[2024-11-07 16:00:14,927][14395] Fps is (10 sec: 6553.6, 60 sec: 6075.7, 300 sec: 5540.0). Total num frames: 17649664. Throughput: 0: 1631.6. Samples: 408654. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9145 |
+
[2024-11-07 16:00:14,929][14395] Avg episode reward: [(0, '4.383')]
|
9146 |
+
[2024-11-07 16:00:15,813][14461] Updated weights for policy 0, policy_version 4310 (0.0032)
|
9147 |
+
[2024-11-07 16:00:19,927][14395] Fps is (10 sec: 5324.8, 60 sec: 5939.3, 300 sec: 5641.1). Total num frames: 17674240. Throughput: 0: 1607.7. Samples: 412162. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9148 |
+
[2024-11-07 16:00:19,929][14395] Avg episode reward: [(0, '4.445')]
|
9149 |
+
[2024-11-07 16:00:22,655][14461] Updated weights for policy 0, policy_version 4320 (0.0036)
|
9150 |
+
[2024-11-07 16:00:24,927][14395] Fps is (10 sec: 5734.5, 60 sec: 6271.8, 300 sec: 5734.4). Total num frames: 17707008. Throughput: 0: 1570.3. Samples: 421674. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9151 |
+
[2024-11-07 16:00:24,932][14395] Avg episode reward: [(0, '4.288')]
|
9152 |
+
[2024-11-07 16:00:28,572][14461] Updated weights for policy 0, policy_version 4330 (0.0029)
|
9153 |
+
[2024-11-07 16:00:29,927][14395] Fps is (10 sec: 6963.0, 60 sec: 6485.3, 300 sec: 5762.2). Total num frames: 17743872. Throughput: 0: 1572.7. Samples: 432254. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9154 |
+
[2024-11-07 16:00:29,929][14395] Avg episode reward: [(0, '4.498')]
|
9155 |
+
[2024-11-07 16:00:34,927][14395] Fps is (10 sec: 5734.4, 60 sec: 6280.6, 300 sec: 5734.4). Total num frames: 17764352. Throughput: 0: 1546.8. Samples: 437286. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9156 |
+
[2024-11-07 16:00:34,929][14395] Avg episode reward: [(0, '4.555')]
|
9157 |
+
[2024-11-07 16:00:37,269][14461] Updated weights for policy 0, policy_version 4340 (0.0042)
|
9158 |
+
[2024-11-07 16:00:39,927][14395] Fps is (10 sec: 4096.1, 60 sec: 5939.2, 300 sec: 5734.4). Total num frames: 17784832. Throughput: 0: 1386.6. Samples: 441886. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9159 |
+
[2024-11-07 16:00:39,929][14395] Avg episode reward: [(0, '4.577')]
|
9160 |
+
[2024-11-07 16:00:44,927][14395] Fps is (10 sec: 4505.5, 60 sec: 5802.6, 300 sec: 5720.5). Total num frames: 17809408. Throughput: 0: 1422.1. Samples: 448766. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9161 |
+
[2024-11-07 16:00:44,929][14395] Avg episode reward: [(0, '4.496')]
|
9162 |
+
[2024-11-07 16:00:46,121][14461] Updated weights for policy 0, policy_version 4350 (0.0056)
|
9163 |
+
[2024-11-07 16:00:49,927][14395] Fps is (10 sec: 5734.4, 60 sec: 5666.1, 300 sec: 5706.6). Total num frames: 17842176. Throughput: 0: 1381.6. Samples: 452868. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9164 |
+
[2024-11-07 16:00:49,929][14395] Avg episode reward: [(0, '4.489')]
|
9165 |
+
[2024-11-07 16:00:52,000][14461] Updated weights for policy 0, policy_version 4360 (0.0030)
|
9166 |
+
[2024-11-07 16:00:54,927][14395] Fps is (10 sec: 6963.4, 60 sec: 5666.2, 300 sec: 5762.2). Total num frames: 17879040. Throughput: 0: 1432.7. Samples: 464034. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9167 |
+
[2024-11-07 16:00:54,929][14395] Avg episode reward: [(0, '4.361')]
|
9168 |
+
[2024-11-07 16:00:58,239][14461] Updated weights for policy 0, policy_version 4370 (0.0043)
|
9169 |
+
[2024-11-07 16:00:59,931][14395] Fps is (10 sec: 6550.9, 60 sec: 5822.6, 300 sec: 5762.1). Total num frames: 17907712. Throughput: 0: 1439.8. Samples: 473450. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9170 |
+
[2024-11-07 16:00:59,937][14395] Avg episode reward: [(0, '4.600')]
|
9171 |
+
[2024-11-07 16:00:59,955][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004372_17907712.pth...
|
9172 |
+
[2024-11-07 16:01:00,145][14445] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004029_16502784.pth
|
9173 |
+
[2024-11-07 16:01:04,927][14395] Fps is (10 sec: 5324.7, 60 sec: 5802.7, 300 sec: 5706.6). Total num frames: 17932288. Throughput: 0: 1424.9. Samples: 476284. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9174 |
+
[2024-11-07 16:01:04,930][14395] Avg episode reward: [(0, '4.918')]
|
9175 |
+
[2024-11-07 16:01:04,934][14445] Saving new best policy, reward=4.918!
|
9176 |
+
[2024-11-07 16:01:06,342][14461] Updated weights for policy 0, policy_version 4380 (0.0053)
|
9177 |
+
[2024-11-07 16:01:09,927][14395] Fps is (10 sec: 3688.0, 60 sec: 5393.1, 300 sec: 5665.1). Total num frames: 17944576. Throughput: 0: 1347.9. Samples: 482328. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9178 |
+
[2024-11-07 16:01:09,929][14395] Avg episode reward: [(0, '4.684')]
|
9179 |
+
[2024-11-07 16:01:14,927][14395] Fps is (10 sec: 3686.4, 60 sec: 5324.8, 300 sec: 5692.8). Total num frames: 17969152. Throughput: 0: 1255.9. Samples: 488770. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9180 |
+
[2024-11-07 16:01:14,928][14395] Avg episode reward: [(0, '4.702')]
|
9181 |
+
[2024-11-07 16:01:16,923][14461] Updated weights for policy 0, policy_version 4390 (0.0039)
|
9182 |
+
[2024-11-07 16:01:19,927][14395] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5678.9). Total num frames: 17993728. Throughput: 0: 1229.5. Samples: 492612. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9183 |
+
[2024-11-07 16:01:19,929][14395] Avg episode reward: [(0, '4.364')]
|
9184 |
+
[2024-11-07 16:01:24,927][14395] Fps is (10 sec: 4915.3, 60 sec: 5188.3, 300 sec: 5637.2). Total num frames: 18018304. Throughput: 0: 1287.0. Samples: 499802. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9185 |
+
[2024-11-07 16:01:24,929][14395] Avg episode reward: [(0, '4.314')]
|
9186 |
+
[2024-11-07 16:01:25,040][14461] Updated weights for policy 0, policy_version 4400 (0.0054)
|
9187 |
+
[2024-11-07 16:01:29,928][14395] Fps is (10 sec: 5734.2, 60 sec: 5120.0, 300 sec: 5678.9). Total num frames: 18051072. Throughput: 0: 1352.4. Samples: 509622. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9188 |
+
[2024-11-07 16:01:29,931][14395] Avg episode reward: [(0, '4.397')]
|
9189 |
+
[2024-11-07 16:01:31,325][14461] Updated weights for policy 0, policy_version 4410 (0.0026)
|
9190 |
+
[2024-11-07 16:01:34,927][14395] Fps is (10 sec: 6553.6, 60 sec: 5324.8, 300 sec: 5651.1). Total num frames: 18083840. Throughput: 0: 1370.8. Samples: 514552. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
9191 |
+
[2024-11-07 16:01:34,931][14395] Avg episode reward: [(0, '4.457')]
|
9192 |
+
[2024-11-07 16:01:37,833][14461] Updated weights for policy 0, policy_version 4420 (0.0036)
|
9193 |
+
[2024-11-07 16:01:39,927][14395] Fps is (10 sec: 6553.7, 60 sec: 5529.6, 300 sec: 5678.9). Total num frames: 18116608. Throughput: 0: 1324.4. Samples: 523632. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
9194 |
+
[2024-11-07 16:01:39,929][14395] Avg episode reward: [(0, '4.292')]
|
9195 |
+
[2024-11-07 16:01:44,927][14395] Fps is (10 sec: 4915.2, 60 sec: 5393.1, 300 sec: 5665.0). Total num frames: 18132992. Throughput: 0: 1249.5. Samples: 529674. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9196 |
+
[2024-11-07 16:01:44,930][14395] Avg episode reward: [(0, '4.513')]
|
9197 |
+
[2024-11-07 16:01:46,301][14461] Updated weights for policy 0, policy_version 4430 (0.0022)
|
9198 |
+
[2024-11-07 16:01:49,927][14395] Fps is (10 sec: 4915.1, 60 sec: 5393.0, 300 sec: 5706.6). Total num frames: 18165760. Throughput: 0: 1301.3. Samples: 534842. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9199 |
+
[2024-11-07 16:01:49,929][14395] Avg episode reward: [(0, '4.579')]
|
9200 |
+
[2024-11-07 16:01:53,333][14461] Updated weights for policy 0, policy_version 4440 (0.0042)
|
9201 |
+
[2024-11-07 16:01:54,928][14395] Fps is (10 sec: 5733.9, 60 sec: 5188.2, 300 sec: 5720.5). Total num frames: 18190336. Throughput: 0: 1357.5. Samples: 543416. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9202 |
+
[2024-11-07 16:01:54,930][14395] Avg episode reward: [(0, '4.285')]
|
9203 |
+
[2024-11-07 16:01:59,270][14461] Updated weights for policy 0, policy_version 4450 (0.0025)
|
9204 |
+
[2024-11-07 16:01:59,927][14395] Fps is (10 sec: 6144.2, 60 sec: 5325.2, 300 sec: 5762.2). Total num frames: 18227200. Throughput: 0: 1444.0. Samples: 553752. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9205 |
+
[2024-11-07 16:01:59,928][14395] Avg episode reward: [(0, '4.199')]
|
9206 |
+
[2024-11-07 16:02:04,928][14395] Fps is (10 sec: 7373.1, 60 sec: 5529.6, 300 sec: 5831.6). Total num frames: 18264064. Throughput: 0: 1459.9. Samples: 558310. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9207 |
+
[2024-11-07 16:02:04,929][14395] Avg episode reward: [(0, '4.323')]
|
9208 |
+
[2024-11-07 16:02:05,171][14461] Updated weights for policy 0, policy_version 4460 (0.0035)
|
9209 |
+
[2024-11-07 16:02:09,927][14395] Fps is (10 sec: 6963.0, 60 sec: 5870.9, 300 sec: 5859.4). Total num frames: 18296832. Throughput: 0: 1556.9. Samples: 569862. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9210 |
+
[2024-11-07 16:02:09,930][14395] Avg episode reward: [(0, '4.337')]
|
9211 |
+
[2024-11-07 16:02:11,877][14461] Updated weights for policy 0, policy_version 4470 (0.0039)
|
9212 |
+
[2024-11-07 16:02:14,928][14395] Fps is (10 sec: 6553.6, 60 sec: 6007.4, 300 sec: 5901.2). Total num frames: 18329600. Throughput: 0: 1525.9. Samples: 578288. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9213 |
+
[2024-11-07 16:02:14,929][14395] Avg episode reward: [(0, '4.454')]
|
9214 |
+
[2024-11-07 16:02:19,928][14395] Fps is (10 sec: 4914.7, 60 sec: 5870.8, 300 sec: 5873.5). Total num frames: 18345984. Throughput: 0: 1478.4. Samples: 581084. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9215 |
+
[2024-11-07 16:02:19,930][14395] Avg episode reward: [(0, '4.500')]
|
9216 |
+
[2024-11-07 16:02:20,550][14461] Updated weights for policy 0, policy_version 4480 (0.0038)
|
9217 |
+
[2024-11-07 16:02:24,927][14395] Fps is (10 sec: 4915.3, 60 sec: 6007.4, 300 sec: 5901.1). Total num frames: 18378752. Throughput: 0: 1451.3. Samples: 588942. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9218 |
+
[2024-11-07 16:02:24,929][14395] Avg episode reward: [(0, '4.422')]
|
9219 |
+
[2024-11-07 16:02:26,971][14461] Updated weights for policy 0, policy_version 4490 (0.0040)
|
9220 |
+
[2024-11-07 16:02:29,928][14395] Fps is (10 sec: 6144.5, 60 sec: 5939.2, 300 sec: 5901.0). Total num frames: 18407424. Throughput: 0: 1530.0. Samples: 598524. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9221 |
+
[2024-11-07 16:02:29,932][14395] Avg episode reward: [(0, '4.494')]
|
9222 |
+
[2024-11-07 16:02:34,478][14461] Updated weights for policy 0, policy_version 4500 (0.0036)
|
9223 |
+
[2024-11-07 16:02:34,927][14395] Fps is (10 sec: 5324.8, 60 sec: 5802.7, 300 sec: 5914.9). Total num frames: 18432000. Throughput: 0: 1493.3. Samples: 602038. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
9224 |
+
[2024-11-07 16:02:34,930][14395] Avg episode reward: [(0, '4.464')]
|
9225 |
+
[2024-11-07 16:02:39,928][14395] Fps is (10 sec: 4915.2, 60 sec: 5666.1, 300 sec: 5956.5). Total num frames: 18456576. Throughput: 0: 1452.6. Samples: 608784. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
9226 |
+
[2024-11-07 16:02:39,930][14395] Avg episode reward: [(0, '4.351')]
|
9227 |
+
[2024-11-07 16:02:42,419][14461] Updated weights for policy 0, policy_version 4510 (0.0040)
|
9228 |
+
[2024-11-07 16:02:44,927][14395] Fps is (10 sec: 5324.8, 60 sec: 5870.9, 300 sec: 5928.8). Total num frames: 18485248. Throughput: 0: 1429.1. Samples: 618062. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
9229 |
+
[2024-11-07 16:02:44,929][14395] Avg episode reward: [(0, '4.379')]
|
9230 |
+
[2024-11-07 16:02:49,091][14461] Updated weights for policy 0, policy_version 4520 (0.0051)
|
9231 |
+
[2024-11-07 16:02:49,927][14395] Fps is (10 sec: 6144.2, 60 sec: 5870.9, 300 sec: 5901.0). Total num frames: 18518016. Throughput: 0: 1435.6. Samples: 622912. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9232 |
+
[2024-11-07 16:02:49,929][14395] Avg episode reward: [(0, '4.579')]
|
9233 |
+
[2024-11-07 16:02:54,927][14395] Fps is (10 sec: 4915.2, 60 sec: 5734.5, 300 sec: 5845.5). Total num frames: 18534400. Throughput: 0: 1298.6. Samples: 628298. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9234 |
+
[2024-11-07 16:02:54,929][14395] Avg episode reward: [(0, '4.503')]
|
9235 |
+
[2024-11-07 16:02:58,299][14461] Updated weights for policy 0, policy_version 4530 (0.0038)
|
9236 |
+
[2024-11-07 16:02:59,932][14395] Fps is (10 sec: 4503.7, 60 sec: 5597.5, 300 sec: 5859.3). Total num frames: 18563072. Throughput: 0: 1309.3. Samples: 637212. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9237 |
+
[2024-11-07 16:02:59,939][14395] Avg episode reward: [(0, '4.345')]
|
9238 |
+
[2024-11-07 16:02:59,951][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004532_18563072.pth...
|
9239 |
+
[2024-11-07 16:03:00,280][14445] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004195_17182720.pth
|
9240 |
+
[2024-11-07 16:03:04,571][14461] Updated weights for policy 0, policy_version 4540 (0.0023)
|
9241 |
+
[2024-11-07 16:03:04,927][14395] Fps is (10 sec: 6144.0, 60 sec: 5529.6, 300 sec: 5845.5). Total num frames: 18595840. Throughput: 0: 1357.9. Samples: 642186. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9242 |
+
[2024-11-07 16:03:04,940][14395] Avg episode reward: [(0, '4.329')]
|
9243 |
+
[2024-11-07 16:03:09,928][14395] Fps is (10 sec: 6146.5, 60 sec: 5461.3, 300 sec: 5895.9). Total num frames: 18624512. Throughput: 0: 1394.9. Samples: 651714. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9244 |
+
[2024-11-07 16:03:09,930][14395] Avg episode reward: [(0, '4.494')]
|
9245 |
+
[2024-11-07 16:03:11,780][14461] Updated weights for policy 0, policy_version 4550 (0.0036)
|
9246 |
+
[2024-11-07 16:03:14,927][14395] Fps is (10 sec: 6143.9, 60 sec: 5461.3, 300 sec: 5901.0). Total num frames: 18657280. Throughput: 0: 1379.3. Samples: 660592. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9247 |
+
[2024-11-07 16:03:14,930][14395] Avg episode reward: [(0, '4.480')]
|
9248 |
+
[2024-11-07 16:03:17,864][14461] Updated weights for policy 0, policy_version 4560 (0.0031)
|
9249 |
+
[2024-11-07 16:03:19,927][14395] Fps is (10 sec: 6144.1, 60 sec: 5666.2, 300 sec: 5887.1). Total num frames: 18685952. Throughput: 0: 1405.0. Samples: 665262. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
9250 |
+
[2024-11-07 16:03:19,930][14395] Avg episode reward: [(0, '4.341')]
|
9251 |
+
[2024-11-07 16:03:27,180][14395] Fps is (10 sec: 4680.1, 60 sec: 5395.3, 300 sec: 5828.7). Total num frames: 18714624. Throughput: 0: 1375.1. Samples: 673762. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9252 |
+
[2024-11-07 16:03:27,196][14395] Avg episode reward: [(0, '4.461')]
|
9253 |
+
[2024-11-07 16:03:27,887][14461] Updated weights for policy 0, policy_version 4570 (0.0042)
|
9254 |
+
[2024-11-07 16:03:29,928][14395] Fps is (10 sec: 3686.0, 60 sec: 5256.5, 300 sec: 5762.1). Total num frames: 18722816. Throughput: 0: 1323.3. Samples: 677612. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
9255 |
+
[2024-11-07 16:03:29,941][14395] Avg episode reward: [(0, '4.371')]
|
9256 |
+
[2024-11-07 16:03:34,927][14395] Fps is (10 sec: 4229.7, 60 sec: 5256.5, 300 sec: 5720.5). Total num frames: 18747392. Throughput: 0: 1290.2. Samples: 680972. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9257 |
+
[2024-11-07 16:03:34,929][14395] Avg episode reward: [(0, '4.341')]
|
9258 |
+
[2024-11-07 16:03:36,869][14461] Updated weights for policy 0, policy_version 4580 (0.0057)
|
9259 |
+
[2024-11-07 16:03:39,928][14395] Fps is (10 sec: 4915.4, 60 sec: 5256.5, 300 sec: 5692.7). Total num frames: 18771968. Throughput: 0: 1337.3. Samples: 688476. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9260 |
+
[2024-11-07 16:03:39,930][14395] Avg episode reward: [(0, '4.376')]
|
9261 |
+
[2024-11-07 16:03:44,774][14461] Updated weights for policy 0, policy_version 4590 (0.0065)
|
9262 |
+
[2024-11-07 16:03:44,928][14395] Fps is (10 sec: 5324.7, 60 sec: 5256.5, 300 sec: 5717.8). Total num frames: 18800640. Throughput: 0: 1316.3. Samples: 696440. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9263 |
+
[2024-11-07 16:03:44,930][14395] Avg episode reward: [(0, '4.450')]
|
9264 |
+
[2024-11-07 16:03:49,927][14395] Fps is (10 sec: 5325.2, 60 sec: 5120.0, 300 sec: 5692.8). Total num frames: 18825216. Throughput: 0: 1286.0. Samples: 700054. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9265 |
+
[2024-11-07 16:03:49,929][14395] Avg episode reward: [(0, '4.593')]
|
9266 |
+
[2024-11-07 16:03:52,136][14461] Updated weights for policy 0, policy_version 4600 (0.0042)
|
9267 |
+
[2024-11-07 16:03:54,927][14395] Fps is (10 sec: 5325.0, 60 sec: 5324.8, 300 sec: 5665.0). Total num frames: 18853888. Throughput: 0: 1271.2. Samples: 708916. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9268 |
+
[2024-11-07 16:03:54,930][14395] Avg episode reward: [(0, '4.477')]
|
9269 |
+
[2024-11-07 16:03:59,332][14461] Updated weights for policy 0, policy_version 4610 (0.0046)
|
9270 |
+
[2024-11-07 16:04:01,643][14395] Fps is (10 sec: 4894.5, 60 sec: 5177.1, 300 sec: 5604.6). Total num frames: 18882560. Throughput: 0: 1220.3. Samples: 717600. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9271 |
+
[2024-11-07 16:04:01,645][14395] Avg episode reward: [(0, '4.510')]
|
9272 |
+
[2024-11-07 16:04:04,927][14395] Fps is (10 sec: 4505.5, 60 sec: 5051.7, 300 sec: 5567.8). Total num frames: 18898944. Throughput: 0: 1185.0. Samples: 718586. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9273 |
+
[2024-11-07 16:04:04,935][14395] Avg episode reward: [(0, '4.586')]
|
9274 |
+
[2024-11-07 16:04:09,356][14461] Updated weights for policy 0, policy_version 4620 (0.0029)
|
9275 |
+
[2024-11-07 16:04:09,930][14395] Fps is (10 sec: 4942.8, 60 sec: 4983.3, 300 sec: 5553.8). Total num frames: 18923520. Throughput: 0: 1227.2. Samples: 726224. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9276 |
+
[2024-11-07 16:04:09,932][14395] Avg episode reward: [(0, '4.688')]
|
9277 |
+
[2024-11-07 16:04:14,927][14395] Fps is (10 sec: 5734.6, 60 sec: 4983.5, 300 sec: 5553.9). Total num frames: 18956288. Throughput: 0: 1282.8. Samples: 735336. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9278 |
+
[2024-11-07 16:04:14,928][14395] Avg episode reward: [(0, '4.624')]
|
9279 |
+
[2024-11-07 16:04:16,116][14461] Updated weights for policy 0, policy_version 4630 (0.0026)
|
9280 |
+
[2024-11-07 16:04:19,927][14395] Fps is (10 sec: 6145.6, 60 sec: 4983.5, 300 sec: 5606.4). Total num frames: 18984960. Throughput: 0: 1312.2. Samples: 740020. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9281 |
+
[2024-11-07 16:04:19,929][14395] Avg episode reward: [(0, '4.610')]
|
9282 |
+
[2024-11-07 16:04:22,497][14461] Updated weights for policy 0, policy_version 4640 (0.0031)
|
9283 |
+
[2024-11-07 16:04:24,927][14395] Fps is (10 sec: 6143.9, 60 sec: 5248.8, 300 sec: 5637.2). Total num frames: 19017728. Throughput: 0: 1357.9. Samples: 749580. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9284 |
+
[2024-11-07 16:04:24,931][14395] Avg episode reward: [(0, '4.613')]
|
9285 |
+
[2024-11-07 16:04:29,009][14461] Updated weights for policy 0, policy_version 4650 (0.0034)
|
9286 |
+
[2024-11-07 16:04:29,932][14395] Fps is (10 sec: 6550.6, 60 sec: 5461.0, 300 sec: 5637.1). Total num frames: 19050496. Throughput: 0: 1386.9. Samples: 758856. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9287 |
+
[2024-11-07 16:04:29,934][14395] Avg episode reward: [(0, '4.465')]
|
9288 |
+
[2024-11-07 16:04:36,334][14395] Fps is (10 sec: 5027.1, 60 sec: 5336.2, 300 sec: 5555.2). Total num frames: 19075072. Throughput: 0: 1359.5. Samples: 763146. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9289 |
+
[2024-11-07 16:04:36,336][14395] Avg episode reward: [(0, '4.551')]
|
9290 |
+
[2024-11-07 16:04:38,320][14461] Updated weights for policy 0, policy_version 4660 (0.0038)
|
9291 |
+
[2024-11-07 16:04:39,927][14395] Fps is (10 sec: 4507.7, 60 sec: 5393.1, 300 sec: 5540.0). Total num frames: 19095552. Throughput: 0: 1327.1. Samples: 768636. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9292 |
+
[2024-11-07 16:04:39,930][14395] Avg episode reward: [(0, '4.531')]
|
9293 |
+
[2024-11-07 16:04:44,927][14395] Fps is (10 sec: 5719.9, 60 sec: 5393.1, 300 sec: 5498.4). Total num frames: 19124224. Throughput: 0: 1389.3. Samples: 777736. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
9294 |
+
[2024-11-07 16:04:44,930][14395] Avg episode reward: [(0, '4.333')]
|
9295 |
+
[2024-11-07 16:04:45,079][14461] Updated weights for policy 0, policy_version 4670 (0.0038)
|
9296 |
+
[2024-11-07 16:04:49,927][14395] Fps is (10 sec: 6144.0, 60 sec: 5529.6, 300 sec: 5484.5). Total num frames: 19156992. Throughput: 0: 1415.9. Samples: 782302. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9297 |
+
[2024-11-07 16:04:49,929][14395] Avg episode reward: [(0, '4.411')]
|
9298 |
+
[2024-11-07 16:04:52,097][14461] Updated weights for policy 0, policy_version 4680 (0.0032)
|
9299 |
+
[2024-11-07 16:04:54,928][14395] Fps is (10 sec: 5734.2, 60 sec: 5461.3, 300 sec: 5502.3). Total num frames: 19181568. Throughput: 0: 1438.7. Samples: 790964. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9300 |
+
[2024-11-07 16:04:54,929][14395] Avg episode reward: [(0, '4.371')]
|
9301 |
+
[2024-11-07 16:04:58,806][14461] Updated weights for policy 0, policy_version 4690 (0.0029)
|
9302 |
+
[2024-11-07 16:04:59,929][14395] Fps is (10 sec: 5733.3, 60 sec: 5692.2, 300 sec: 5526.1). Total num frames: 19214336. Throughput: 0: 1440.9. Samples: 800180. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9303 |
+
[2024-11-07 16:04:59,932][14395] Avg episode reward: [(0, '4.705')]
|
9304 |
+
[2024-11-07 16:04:59,948][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004691_19214336.pth...
|
9305 |
+
[2024-11-07 16:05:00,105][14445] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004372_17907712.pth
|
9306 |
+
[2024-11-07 16:05:04,927][14395] Fps is (10 sec: 6144.2, 60 sec: 5734.4, 300 sec: 5498.4). Total num frames: 19243008. Throughput: 0: 1419.7. Samples: 803908. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9307 |
+
[2024-11-07 16:05:04,930][14395] Avg episode reward: [(0, '4.499')]
|
9308 |
+
[2024-11-07 16:05:06,089][14461] Updated weights for policy 0, policy_version 4700 (0.0041)
|
9309 |
+
[2024-11-07 16:05:10,766][14395] Fps is (10 sec: 4535.6, 60 sec: 5588.2, 300 sec: 5455.1). Total num frames: 19263488. Throughput: 0: 1385.6. Samples: 813096. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9310 |
+
[2024-11-07 16:05:10,772][14395] Avg episode reward: [(0, '4.577')]
|
9311 |
+
[2024-11-07 16:05:14,927][14395] Fps is (10 sec: 4505.6, 60 sec: 5529.6, 300 sec: 5470.6). Total num frames: 19288064. Throughput: 0: 1329.8. Samples: 818690. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9312 |
+
[2024-11-07 16:05:14,930][14395] Avg episode reward: [(0, '4.508')]
|
9313 |
+
[2024-11-07 16:05:15,214][14461] Updated weights for policy 0, policy_version 4710 (0.0036)
|
9314 |
+
[2024-11-07 16:05:19,927][14395] Fps is (10 sec: 5365.2, 60 sec: 5461.3, 300 sec: 5442.8). Total num frames: 19312640. Throughput: 0: 1343.5. Samples: 821712. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
9315 |
+
[2024-11-07 16:05:19,937][14395] Avg episode reward: [(0, '4.623')]
|
9316 |
+
[2024-11-07 16:05:23,759][14461] Updated weights for policy 0, policy_version 4720 (0.0068)
|
9317 |
+
[2024-11-07 16:05:24,927][14395] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5401.2). Total num frames: 19337216. Throughput: 0: 1355.1. Samples: 829614. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9318 |
+
[2024-11-07 16:05:24,929][14395] Avg episode reward: [(0, '4.623')]
|
9319 |
+
[2024-11-07 16:05:29,927][14395] Fps is (10 sec: 4915.1, 60 sec: 5188.6, 300 sec: 5415.0). Total num frames: 19361792. Throughput: 0: 1304.9. Samples: 836456. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9320 |
+
[2024-11-07 16:05:29,931][14395] Avg episode reward: [(0, '4.453')]
|
9321 |
+
[2024-11-07 16:05:31,546][14461] Updated weights for policy 0, policy_version 4730 (0.0066)
|
9322 |
+
[2024-11-07 16:05:34,927][14395] Fps is (10 sec: 5734.4, 60 sec: 5452.7, 300 sec: 5456.7). Total num frames: 19394560. Throughput: 0: 1324.0. Samples: 841884. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9323 |
+
[2024-11-07 16:05:34,930][14395] Avg episode reward: [(0, '4.237')]
|
9324 |
+
[2024-11-07 16:05:37,528][14461] Updated weights for policy 0, policy_version 4740 (0.0035)
|
9325 |
+
[2024-11-07 16:05:39,927][14395] Fps is (10 sec: 6963.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 19431424. Throughput: 0: 1362.9. Samples: 852294. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9326 |
+
[2024-11-07 16:05:39,929][14395] Avg episode reward: [(0, '4.496')]
|
9327 |
+
[2024-11-07 16:05:42,481][14461] Updated weights for policy 0, policy_version 4750 (0.0028)
|
9328 |
+
[2024-11-07 16:05:45,234][14395] Fps is (10 sec: 5961.3, 60 sec: 5501.5, 300 sec: 5464.9). Total num frames: 19456000. Throughput: 0: 1287.3. Samples: 858502. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9329 |
+
[2024-11-07 16:05:45,236][14395] Avg episode reward: [(0, '4.379')]
|
9330 |
+
[2024-11-07 16:05:49,850][14461] Updated weights for policy 0, policy_version 4760 (0.0026)
|
9331 |
+
[2024-11-07 16:05:49,927][14395] Fps is (10 sec: 6553.6, 60 sec: 5666.1, 300 sec: 5484.5). Total num frames: 19496960. Throughput: 0: 1380.0. Samples: 866006. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9332 |
+
[2024-11-07 16:05:49,929][14395] Avg episode reward: [(0, '4.544')]
|
9333 |
+
[2024-11-07 16:05:54,927][14395] Fps is (10 sec: 8028.4, 60 sec: 5871.0, 300 sec: 5512.3). Total num frames: 19533824. Throughput: 0: 1476.0. Samples: 878278. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9334 |
+
[2024-11-07 16:05:54,929][14395] Avg episode reward: [(0, '4.481')]
|
9335 |
+
[2024-11-07 16:05:54,982][14461] Updated weights for policy 0, policy_version 4770 (0.0025)
|
9336 |
+
[2024-11-07 16:05:59,923][14461] Updated weights for policy 0, policy_version 4780 (0.0029)
|
9337 |
+
[2024-11-07 16:05:59,927][14395] Fps is (10 sec: 8192.0, 60 sec: 6075.9, 300 sec: 5581.7). Total num frames: 19578880. Throughput: 0: 1594.6. Samples: 890448. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9338 |
+
[2024-11-07 16:05:59,929][14395] Avg episode reward: [(0, '4.536')]
|
9339 |
+
[2024-11-07 16:06:04,927][14395] Fps is (10 sec: 7782.5, 60 sec: 6144.0, 300 sec: 5651.1). Total num frames: 19611648. Throughput: 0: 1644.9. Samples: 895734. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9340 |
+
[2024-11-07 16:06:04,928][14395] Avg episode reward: [(0, '4.282')]
|
9341 |
+
[2024-11-07 16:06:05,438][14461] Updated weights for policy 0, policy_version 4790 (0.0027)
|
9342 |
+
[2024-11-07 16:06:09,927][14395] Fps is (10 sec: 7372.8, 60 sec: 6577.3, 300 sec: 5706.6). Total num frames: 19652608. Throughput: 0: 1738.2. Samples: 907832. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9343 |
+
[2024-11-07 16:06:09,930][14395] Avg episode reward: [(0, '4.543')]
|
9344 |
+
[2024-11-07 16:06:10,827][14461] Updated weights for policy 0, policy_version 4800 (0.0028)
|
9345 |
+
[2024-11-07 16:06:14,927][14395] Fps is (10 sec: 7782.3, 60 sec: 6690.1, 300 sec: 5748.3). Total num frames: 19689472. Throughput: 0: 1832.5. Samples: 918920. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9346 |
+
[2024-11-07 16:06:14,932][14395] Avg episode reward: [(0, '4.297')]
|
9347 |
+
[2024-11-07 16:06:16,033][14461] Updated weights for policy 0, policy_version 4810 (0.0029)
|
9348 |
+
[2024-11-07 16:06:19,927][14395] Fps is (10 sec: 6143.9, 60 sec: 6690.1, 300 sec: 5748.3). Total num frames: 19714048. Throughput: 0: 1850.5. Samples: 925158. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
9349 |
+
[2024-11-07 16:06:19,931][14395] Avg episode reward: [(0, '4.377')]
|
9350 |
+
[2024-11-07 16:06:23,171][14461] Updated weights for policy 0, policy_version 4820 (0.0031)
|
9351 |
+
[2024-11-07 16:06:24,927][14395] Fps is (10 sec: 6553.6, 60 sec: 6963.2, 300 sec: 5776.1). Total num frames: 19755008. Throughput: 0: 1792.7. Samples: 932966. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
9352 |
+
[2024-11-07 16:06:24,929][14395] Avg episode reward: [(0, '4.330')]
|
9353 |
+
[2024-11-07 16:06:28,033][14461] Updated weights for policy 0, policy_version 4830 (0.0030)
|
9354 |
+
[2024-11-07 16:06:29,927][14395] Fps is (10 sec: 8192.1, 60 sec: 7236.3, 300 sec: 5803.8). Total num frames: 19795968. Throughput: 0: 1945.3. Samples: 945444. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
9355 |
+
[2024-11-07 16:06:29,929][14395] Avg episode reward: [(0, '4.467')]
|
9356 |
+
[2024-11-07 16:06:33,086][14461] Updated weights for policy 0, policy_version 4840 (0.0030)
|
9357 |
+
[2024-11-07 16:06:34,927][14395] Fps is (10 sec: 8192.0, 60 sec: 7372.8, 300 sec: 5831.6). Total num frames: 19836928. Throughput: 0: 1899.5. Samples: 951484. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9358 |
+
[2024-11-07 16:06:34,929][14395] Avg episode reward: [(0, '4.250')]
|
9359 |
+
[2024-11-07 16:06:37,870][14461] Updated weights for policy 0, policy_version 4850 (0.0034)
|
9360 |
+
[2024-11-07 16:06:39,927][14395] Fps is (10 sec: 8192.1, 60 sec: 7441.1, 300 sec: 5914.9). Total num frames: 19877888. Throughput: 0: 1907.3. Samples: 964104. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9361 |
+
[2024-11-07 16:06:39,928][14395] Avg episode reward: [(0, '4.500')]
|
9362 |
+
[2024-11-07 16:06:43,599][14461] Updated weights for policy 0, policy_version 4860 (0.0025)
|
9363 |
+
[2024-11-07 16:06:44,927][14395] Fps is (10 sec: 7782.4, 60 sec: 7685.1, 300 sec: 5928.8). Total num frames: 19914752. Throughput: 0: 1879.1. Samples: 975008. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
9364 |
+
[2024-11-07 16:06:44,930][14395] Avg episode reward: [(0, '4.327')]
|
9365 |
+
[2024-11-07 16:06:49,088][14461] Updated weights for policy 0, policy_version 4870 (0.0028)
|
9366 |
+
[2024-11-07 16:06:49,927][14395] Fps is (10 sec: 7372.8, 60 sec: 7577.6, 300 sec: 5970.5). Total num frames: 19951616. Throughput: 0: 1880.6. Samples: 980362. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9367 |
+
[2024-11-07 16:06:49,929][14395] Avg episode reward: [(0, '4.475')]
|
9368 |
+
[2024-11-07 16:06:54,927][14395] Fps is (10 sec: 6144.0, 60 sec: 7372.8, 300 sec: 5928.8). Total num frames: 19976192. Throughput: 0: 1814.1. Samples: 989466. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
9369 |
+
[2024-11-07 16:06:54,930][14395] Avg episode reward: [(0, '4.451')]
|
9370 |
+
[2024-11-07 16:06:56,409][14461] Updated weights for policy 0, policy_version 4880 (0.0033)
|
9371 |
+
[2024-11-07 16:06:58,350][14445] Stopping Batcher_0...
|
9372 |
+
[2024-11-07 16:06:58,350][14395] Component Batcher_0 stopped!
|
9373 |
+
[2024-11-07 16:06:58,352][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth...
|
9374 |
+
[2024-11-07 16:06:58,355][14445] Loop batcher_evt_loop terminating...
|
9375 |
+
[2024-11-07 16:06:58,434][14461] Weights refcount: 2 0
|
9376 |
+
[2024-11-07 16:06:58,440][14461] Stopping InferenceWorker_p0-w0...
|
9377 |
+
[2024-11-07 16:06:58,441][14461] Loop inference_proc0-0_evt_loop terminating...
|
9378 |
+
[2024-11-07 16:06:58,440][14395] Component InferenceWorker_p0-w0 stopped!
|
9379 |
+
[2024-11-07 16:06:58,493][14395] Component RolloutWorker_w3 stopped!
|
9380 |
+
[2024-11-07 16:06:58,495][14468] Stopping RolloutWorker_w3...
|
9381 |
+
[2024-11-07 16:06:58,501][14468] Loop rollout_proc3_evt_loop terminating...
|
9382 |
+
[2024-11-07 16:06:58,516][14445] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004532_18563072.pth
|
9383 |
+
[2024-11-07 16:06:58,521][14395] Component RolloutWorker_w4 stopped!
|
9384 |
+
[2024-11-07 16:06:58,526][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth...
|
9385 |
+
[2024-11-07 16:06:58,531][14477] Stopping RolloutWorker_w4...
|
9386 |
+
[2024-11-07 16:06:58,535][14477] Loop rollout_proc4_evt_loop terminating...
|
9387 |
+
[2024-11-07 16:06:58,540][14395] Component RolloutWorker_w1 stopped!
|
9388 |
+
[2024-11-07 16:06:58,543][14467] Stopping RolloutWorker_w1...
|
9389 |
+
[2024-11-07 16:06:58,554][14467] Loop rollout_proc1_evt_loop terminating...
|
9390 |
+
[2024-11-07 16:06:58,556][14395] Component RolloutWorker_w0 stopped!
|
9391 |
+
[2024-11-07 16:06:58,556][14462] Stopping RolloutWorker_w0...
|
9392 |
+
[2024-11-07 16:06:58,564][14462] Loop rollout_proc0_evt_loop terminating...
|
9393 |
+
[2024-11-07 16:06:58,608][14470] Stopping RolloutWorker_w6...
|
9394 |
+
[2024-11-07 16:06:58,608][14395] Component RolloutWorker_w6 stopped!
|
9395 |
+
[2024-11-07 16:06:58,609][14470] Loop rollout_proc6_evt_loop terminating...
|
9396 |
+
[2024-11-07 16:06:58,728][14445] Stopping LearnerWorker_p0...
|
9397 |
+
[2024-11-07 16:06:58,728][14445] Loop learner_proc0_evt_loop terminating...
|
9398 |
+
[2024-11-07 16:06:58,728][14395] Component LearnerWorker_p0 stopped!
|
9399 |
+
[2024-11-07 16:06:58,738][14395] Component RolloutWorker_w7 stopped!
|
9400 |
+
[2024-11-07 16:06:58,740][14469] Stopping RolloutWorker_w7...
|
9401 |
+
[2024-11-07 16:06:58,825][14469] Loop rollout_proc7_evt_loop terminating...
|
9402 |
+
[2024-11-07 16:06:59,006][14395] Component RolloutWorker_w2 stopped!
|
9403 |
+
[2024-11-07 16:06:59,004][14466] Stopping RolloutWorker_w2...
|
9404 |
+
[2024-11-07 16:06:59,007][14466] Loop rollout_proc2_evt_loop terminating...
|
9405 |
+
[2024-11-07 16:06:59,405][14395] Component RolloutWorker_w5 stopped!
|
9406 |
+
[2024-11-07 16:06:59,412][14395] Waiting for process learner_proc0 to stop...
|
9407 |
+
[2024-11-07 16:06:59,416][14478] Stopping RolloutWorker_w5...
|
9408 |
+
[2024-11-07 16:06:59,424][14478] Loop rollout_proc5_evt_loop terminating...
|
9409 |
+
[2024-11-07 16:07:02,027][14395] Waiting for process inference_proc0-0 to join...
|
9410 |
+
[2024-11-07 16:07:02,029][14395] Waiting for process rollout_proc0 to join...
|
9411 |
+
[2024-11-07 16:07:02,030][14395] Waiting for process rollout_proc1 to join...
|
9412 |
+
[2024-11-07 16:07:02,031][14395] Waiting for process rollout_proc2 to join...
|
9413 |
+
[2024-11-07 16:07:02,033][14395] Waiting for process rollout_proc3 to join...
|
9414 |
+
[2024-11-07 16:07:02,035][14395] Waiting for process rollout_proc4 to join...
|
9415 |
+
[2024-11-07 16:07:02,038][14395] Waiting for process rollout_proc5 to join...
|
9416 |
+
[2024-11-07 16:07:02,040][14395] Waiting for process rollout_proc6 to join...
|
9417 |
+
[2024-11-07 16:07:02,043][14395] Waiting for process rollout_proc7 to join...
|
9418 |
+
[2024-11-07 16:07:02,046][14395] Batcher 0 profile tree view:
|
9419 |
+
batching: 32.3104, releasing_batches: 0.0488
|
9420 |
+
[2024-11-07 16:07:02,048][14395] InferenceWorker_p0-w0 profile tree view:
|
9421 |
+
wait_policy: 0.0004
|
9422 |
+
wait_policy_total: 9.8218
|
9423 |
+
update_model: 10.4168
|
9424 |
+
weight_update: 0.0027
|
9425 |
+
one_step: 0.0077
|
9426 |
+
handle_policy_step: 648.0918
|
9427 |
+
deserialize: 19.6855, stack: 3.1513, obs_to_device_normalize: 190.8703, forward: 275.0630, send_messages: 49.3024
|
9428 |
+
prepare_outputs: 88.3335
|
9429 |
+
to_cpu: 66.1440
|
9430 |
+
[2024-11-07 16:07:02,050][14395] Learner 0 profile tree view:
|
9431 |
+
misc: 0.0066, prepare_batch: 31.8707
|
9432 |
+
train: 129.8584
|
9433 |
+
epoch_init: 0.0102, minibatch_init: 0.0110, losses_postprocess: 3.2725, kl_divergence: 1.3944, after_optimizer: 4.4368
|
9434 |
+
calculate_losses: 43.1480
|
9435 |
+
losses_init: 0.0066, forward_head: 4.2252, bptt_initial: 29.1641, tail: 1.5559, advantages_returns: 0.3835, losses: 3.9513
|
9436 |
+
bptt: 3.5262
|
9437 |
+
bptt_forward_core: 3.3479
|
9438 |
+
update: 76.8025
|
9439 |
+
clip: 1.5711
|
9440 |
+
[2024-11-07 16:07:02,052][14395] RolloutWorker_w0 profile tree view:
|
9441 |
+
wait_for_trajectories: 0.2704, enqueue_policy_requests: 17.1022, env_step: 199.2890, overhead: 15.8391, complete_rollouts: 0.7729
|
9442 |
+
save_policy_outputs: 23.6707
|
9443 |
+
split_output_tensors: 8.6823
|
9444 |
+
[2024-11-07 16:07:02,054][14395] RolloutWorker_w7 profile tree view:
|
9445 |
+
wait_for_trajectories: 0.2330, enqueue_policy_requests: 15.3368, env_step: 303.0272, overhead: 17.1887, complete_rollouts: 0.5677
|
9446 |
+
save_policy_outputs: 18.8034
|
9447 |
+
split_output_tensors: 6.3821
|
9448 |
+
[2024-11-07 16:07:02,056][14395] Loop Runner_EvtLoop terminating...
|
9449 |
+
[2024-11-07 16:07:02,058][14395] Runner profile tree view:
|
9450 |
+
main_loop: 717.3057
|
9451 |
+
[2024-11-07 16:07:02,062][14395] Collected {0: 20004864}, FPS: 5561.8
|
9452 |
+
[2024-11-07 16:07:04,057][14395] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
|
9453 |
+
[2024-11-07 16:07:04,058][14395] Overriding arg 'num_workers' with value 1 passed from command line
|
9454 |
+
[2024-11-07 16:07:04,060][14395] Adding new argument 'no_render'=True that is not in the saved config file!
|
9455 |
+
[2024-11-07 16:07:04,063][14395] Adding new argument 'save_video'=True that is not in the saved config file!
|
9456 |
+
[2024-11-07 16:07:04,064][14395] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
9457 |
+
[2024-11-07 16:07:04,066][14395] Adding new argument 'video_name'=None that is not in the saved config file!
|
9458 |
+
[2024-11-07 16:07:04,068][14395] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
|
9459 |
+
[2024-11-07 16:07:04,069][14395] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
9460 |
+
[2024-11-07 16:07:04,071][14395] Adding new argument 'push_to_hub'=False that is not in the saved config file!
|
9461 |
+
[2024-11-07 16:07:04,073][14395] Adding new argument 'hf_repository'=None that is not in the saved config file!
|
9462 |
+
[2024-11-07 16:07:04,076][14395] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
9463 |
+
[2024-11-07 16:07:04,078][14395] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
9464 |
+
[2024-11-07 16:07:04,079][14395] Adding new argument 'train_script'=None that is not in the saved config file!
|
9465 |
+
[2024-11-07 16:07:04,080][14395] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
9466 |
+
[2024-11-07 16:07:04,081][14395] Using frameskip 1 and render_action_repeat=4 for evaluation
|
9467 |
+
[2024-11-07 16:07:04,121][14395] Doom resolution: 160x120, resize resolution: (128, 72)
|
9468 |
+
[2024-11-07 16:07:04,125][14395] RunningMeanStd input shape: (3, 72, 128)
|
9469 |
+
[2024-11-07 16:07:04,128][14395] RunningMeanStd input shape: (1,)
|
9470 |
+
[2024-11-07 16:07:04,150][14395] ConvEncoder: input_channels=3
|
9471 |
+
[2024-11-07 16:07:04,283][14395] Conv encoder output size: 512
|
9472 |
+
[2024-11-07 16:07:04,285][14395] Policy head output size: 512
|
9473 |
+
[2024-11-07 16:07:05,321][14395] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth...
|
9474 |
+
[2024-11-07 16:07:06,188][14395] Num frames 100...
|
9475 |
+
[2024-11-07 16:07:06,422][14395] Num frames 200...
|
9476 |
+
[2024-11-07 16:07:06,617][14395] Num frames 300...
|
9477 |
+
[2024-11-07 16:07:06,844][14395] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
9478 |
+
[2024-11-07 16:07:06,845][14395] Avg episode reward: 3.840, avg true_objective: 3.840
|
9479 |
+
[2024-11-07 16:07:06,884][14395] Num frames 400...
|
9480 |
+
[2024-11-07 16:07:07,096][14395] Num frames 500...
|
9481 |
+
[2024-11-07 16:07:07,280][14395] Num frames 600...
|
9482 |
+
[2024-11-07 16:07:07,482][14395] Num frames 700...
|
9483 |
+
[2024-11-07 16:07:07,675][14395] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
9484 |
+
[2024-11-07 16:07:07,677][14395] Avg episode reward: 3.840, avg true_objective: 3.840
|
9485 |
+
[2024-11-07 16:07:07,748][14395] Num frames 800...
|
9486 |
+
[2024-11-07 16:07:07,932][14395] Num frames 900...
|
9487 |
+
[2024-11-07 16:07:08,126][14395] Num frames 1000...
|
9488 |
+
[2024-11-07 16:07:08,328][14395] Num frames 1100...
|
9489 |
+
[2024-11-07 16:07:08,555][14395] Avg episode rewards: #0: 4.280, true rewards: #0: 3.947
|
9490 |
+
[2024-11-07 16:07:08,556][14395] Avg episode reward: 4.280, avg true_objective: 3.947
|
9491 |
+
[2024-11-07 16:07:08,592][14395] Num frames 1200...
|
9492 |
+
[2024-11-07 16:07:08,781][14395] Num frames 1300...
|
9493 |
+
[2024-11-07 16:07:08,969][14395] Num frames 1400...
|
9494 |
+
[2024-11-07 16:07:09,158][14395] Num frames 1500...
|
9495 |
+
[2024-11-07 16:07:09,362][14395] Avg episode rewards: #0: 4.170, true rewards: #0: 3.920
|
9496 |
+
[2024-11-07 16:07:09,364][14395] Avg episode reward: 4.170, avg true_objective: 3.920
|
9497 |
+
[2024-11-07 16:07:09,437][14395] Num frames 1600...
|
9498 |
+
[2024-11-07 16:07:09,623][14395] Num frames 1700...
|
9499 |
+
[2024-11-07 16:07:09,824][14395] Num frames 1800...
|
9500 |
+
[2024-11-07 16:07:10,015][14395] Num frames 1900...
|
9501 |
+
[2024-11-07 16:07:10,204][14395] Num frames 2000...
|
9502 |
+
[2024-11-07 16:07:10,297][14395] Avg episode rewards: #0: 4.432, true rewards: #0: 4.032
|
9503 |
+
[2024-11-07 16:07:10,298][14395] Avg episode reward: 4.432, avg true_objective: 4.032
|
9504 |
+
[2024-11-07 16:07:10,613][14395] Num frames 2100...
|
9505 |
+
[2024-11-07 16:07:10,776][14395] Num frames 2200...
|
9506 |
+
[2024-11-07 16:07:10,931][14395] Num frames 2300...
|
9507 |
+
[2024-11-07 16:07:11,083][14395] Num frames 2400...
|
9508 |
+
[2024-11-07 16:07:11,136][14395] Avg episode rewards: #0: 4.333, true rewards: #0: 4.000
|
9509 |
+
[2024-11-07 16:07:11,141][14395] Avg episode reward: 4.333, avg true_objective: 4.000
|
9510 |
+
[2024-11-07 16:07:11,310][14395] Num frames 2500...
|
9511 |
+
[2024-11-07 16:07:11,467][14395] Num frames 2600...
|
9512 |
+
[2024-11-07 16:07:11,629][14395] Num frames 2700...
|
9513 |
+
[2024-11-07 16:07:11,821][14395] Avg episode rewards: #0: 4.263, true rewards: #0: 3.977
|
9514 |
+
[2024-11-07 16:07:11,823][14395] Avg episode reward: 4.263, avg true_objective: 3.977
|
9515 |
+
[2024-11-07 16:07:11,861][14395] Num frames 2800...
|
9516 |
+
[2024-11-07 16:07:12,057][14395] Num frames 2900...
|
9517 |
+
[2024-11-07 16:07:12,242][14395] Num frames 3000...
|
9518 |
+
[2024-11-07 16:07:12,403][14395] Num frames 3100...
|
9519 |
+
[2024-11-07 16:07:12,565][14395] Avg episode rewards: #0: 4.210, true rewards: #0: 3.960
|
9520 |
+
[2024-11-07 16:07:12,568][14395] Avg episode reward: 4.210, avg true_objective: 3.960
|
9521 |
+
[2024-11-07 16:07:12,641][14395] Num frames 3200...
|
9522 |
+
[2024-11-07 16:07:12,801][14395] Num frames 3300...
|
9523 |
+
[2024-11-07 16:07:12,954][14395] Num frames 3400...
|
9524 |
+
[2024-11-07 16:07:13,121][14395] Num frames 3500...
|
9525 |
+
[2024-11-07 16:07:13,265][14395] Avg episode rewards: #0: 4.169, true rewards: #0: 3.947
|
9526 |
+
[2024-11-07 16:07:13,266][14395] Avg episode reward: 4.169, avg true_objective: 3.947
|
9527 |
+
[2024-11-07 16:07:13,351][14395] Num frames 3600...
|
9528 |
+
[2024-11-07 16:07:13,513][14395] Num frames 3700...
|
9529 |
+
[2024-11-07 16:07:13,670][14395] Num frames 3800...
|
9530 |
+
[2024-11-07 16:07:13,825][14395] Num frames 3900...
|
9531 |
+
[2024-11-07 16:07:13,938][14395] Avg episode rewards: #0: 4.136, true rewards: #0: 3.936
|
9532 |
+
[2024-11-07 16:07:13,942][14395] Avg episode reward: 4.136, avg true_objective: 3.936
|
9533 |
+
[2024-11-07 16:07:21,728][14395] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
|
9534 |
+
[2024-11-07 16:07:24,406][14395] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
|
9535 |
+
[2024-11-07 16:07:24,407][14395] Overriding arg 'num_workers' with value 1 passed from command line
|
9536 |
+
[2024-11-07 16:07:24,408][14395] Adding new argument 'no_render'=True that is not in the saved config file!
|
9537 |
+
[2024-11-07 16:07:24,410][14395] Adding new argument 'save_video'=True that is not in the saved config file!
|
9538 |
+
[2024-11-07 16:07:24,410][14395] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
9539 |
+
[2024-11-07 16:07:24,412][14395] Adding new argument 'video_name'=None that is not in the saved config file!
|
9540 |
+
[2024-11-07 16:07:24,414][14395] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
|
9541 |
+
[2024-11-07 16:07:24,415][14395] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
9542 |
+
[2024-11-07 16:07:24,417][14395] Adding new argument 'push_to_hub'=True that is not in the saved config file!
|
9543 |
+
[2024-11-07 16:07:24,419][14395] Adding new argument 'hf_repository'='alidenewade/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
|
9544 |
+
[2024-11-07 16:07:24,420][14395] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
9545 |
+
[2024-11-07 16:07:24,422][14395] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
9546 |
+
[2024-11-07 16:07:24,423][14395] Adding new argument 'train_script'=None that is not in the saved config file!
|
9547 |
+
[2024-11-07 16:07:24,424][14395] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
9548 |
+
[2024-11-07 16:07:24,426][14395] Using frameskip 1 and render_action_repeat=4 for evaluation
|
9549 |
+
[2024-11-07 16:07:24,453][14395] RunningMeanStd input shape: (3, 72, 128)
|
9550 |
+
[2024-11-07 16:07:24,455][14395] RunningMeanStd input shape: (1,)
|
9551 |
+
[2024-11-07 16:07:24,467][14395] ConvEncoder: input_channels=3
|
9552 |
+
[2024-11-07 16:07:24,506][14395] Conv encoder output size: 512
|
9553 |
+
[2024-11-07 16:07:24,508][14395] Policy head output size: 512
|
9554 |
+
[2024-11-07 16:07:24,534][14395] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth...
|
9555 |
+
[2024-11-07 16:07:25,029][14395] Num frames 100...
|
9556 |
+
[2024-11-07 16:07:25,225][14395] Num frames 200...
|
9557 |
+
[2024-11-07 16:07:25,408][14395] Num frames 300...
|
9558 |
+
[2024-11-07 16:07:25,589][14395] Num frames 400...
|
9559 |
+
[2024-11-07 16:07:25,732][14395] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480
|
9560 |
+
[2024-11-07 16:07:25,735][14395] Avg episode reward: 5.480, avg true_objective: 4.480
|
9561 |
+
[2024-11-07 16:07:25,849][14395] Num frames 500...
|
9562 |
+
[2024-11-07 16:07:26,042][14395] Num frames 600...
|
9563 |
+
[2024-11-07 16:07:26,234][14395] Num frames 700...
|
9564 |
+
[2024-11-07 16:07:28,651][14395] Num frames 800...
|
9565 |
+
[2024-11-07 16:07:28,777][14395] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
|
9566 |
+
[2024-11-07 16:07:28,780][14395] Avg episode reward: 4.660, avg true_objective: 4.160
|
9567 |
+
[2024-11-07 16:07:28,970][14395] Num frames 900...
|
9568 |
+
[2024-11-07 16:07:29,166][14395] Num frames 1000...
|
9569 |
+
[2024-11-07 16:07:29,376][14395] Num frames 1100...
|
9570 |
+
[2024-11-07 16:07:29,567][14395] Num frames 1200...
|
9571 |
+
[2024-11-07 16:07:29,644][14395] Avg episode rewards: #0: 4.367, true rewards: #0: 4.033
|
9572 |
+
[2024-11-07 16:07:29,645][14395] Avg episode reward: 4.367, avg true_objective: 4.033
|
9573 |
+
[2024-11-07 16:07:29,827][14395] Num frames 1300...
|
9574 |
+
[2024-11-07 16:07:30,024][14395] Num frames 1400...
|
9575 |
+
[2024-11-07 16:07:30,223][14395] Num frames 1500...
|
9576 |
+
[2024-11-07 16:07:30,454][14395] Avg episode rewards: #0: 4.235, true rewards: #0: 3.985
|
9577 |
+
[2024-11-07 16:07:30,458][14395] Avg episode reward: 4.235, avg true_objective: 3.985
|
9578 |
+
[2024-11-07 16:07:30,487][14395] Num frames 1600...
|
9579 |
+
[2024-11-07 16:07:30,674][14395] Num frames 1700...
|
9580 |
+
[2024-11-07 16:07:30,864][14395] Num frames 1800...
|
9581 |
+
[2024-11-07 16:07:31,053][14395] Num frames 1900...
|
9582 |
+
[2024-11-07 16:07:31,263][14395] Avg episode rewards: #0: 4.156, true rewards: #0: 3.956
|
9583 |
+
[2024-11-07 16:07:31,266][14395] Avg episode reward: 4.156, avg true_objective: 3.956
|
9584 |
+
[2024-11-07 16:07:31,328][14395] Num frames 2000...
|
9585 |
+
[2024-11-07 16:07:31,507][14395] Num frames 2100...
|
9586 |
+
[2024-11-07 16:07:31,687][14395] Num frames 2200...
|
9587 |
+
[2024-11-07 16:07:31,875][14395] Num frames 2300...
|
9588 |
+
[2024-11-07 16:07:32,054][14395] Num frames 2400...
|
9589 |
+
[2024-11-07 16:07:32,158][14395] Avg episode rewards: #0: 4.377, true rewards: #0: 4.043
|
9590 |
+
[2024-11-07 16:07:32,160][14395] Avg episode reward: 4.377, avg true_objective: 4.043
|
9591 |
+
[2024-11-07 16:07:32,315][14395] Num frames 2500...
|
9592 |
+
[2024-11-07 16:07:32,504][14395] Num frames 2600...
|
9593 |
+
[2024-11-07 16:07:32,692][14395] Num frames 2700...
|
9594 |
+
[2024-11-07 16:07:32,883][14395] Num frames 2800...
|
9595 |
+
[2024-11-07 16:07:32,959][14395] Avg episode rewards: #0: 4.300, true rewards: #0: 4.014
|
9596 |
+
[2024-11-07 16:07:32,961][14395] Avg episode reward: 4.300, avg true_objective: 4.014
|
9597 |
+
[2024-11-07 16:07:33,137][14395] Num frames 2900...
|
9598 |
+
[2024-11-07 16:07:33,331][14395] Num frames 3000...
|
9599 |
+
[2024-11-07 16:07:33,528][14395] Num frames 3100...
|
9600 |
+
[2024-11-07 16:07:33,770][14395] Avg episode rewards: #0: 4.243, true rewards: #0: 3.992
|
9601 |
+
[2024-11-07 16:07:33,771][14395] Avg episode reward: 4.243, avg true_objective: 3.992
|
9602 |
+
[2024-11-07 16:07:33,782][14395] Num frames 3200...
|
9603 |
+
[2024-11-07 16:07:33,993][14395] Num frames 3300...
|
9604 |
+
[2024-11-07 16:07:34,194][14395] Num frames 3400...
|
9605 |
+
[2024-11-07 16:07:34,400][14395] Num frames 3500...
|
9606 |
+
[2024-11-07 16:07:34,611][14395] Avg episode rewards: #0: 4.198, true rewards: #0: 3.976
|
9607 |
+
[2024-11-07 16:07:34,612][14395] Avg episode reward: 4.198, avg true_objective: 3.976
|
9608 |
+
[2024-11-07 16:07:34,657][14395] Num frames 3600...
|
9609 |
+
[2024-11-07 16:07:34,862][14395] Num frames 3700...
|
9610 |
+
[2024-11-07 16:07:35,065][14395] Num frames 3800...
|
9611 |
+
[2024-11-07 16:07:35,201][14395] Avg episode rewards: #0: 4.034, true rewards: #0: 3.834
|
9612 |
+
[2024-11-07 16:07:35,203][14395] Avg episode reward: 4.034, avg true_objective: 3.834
|
9613 |
+
[2024-11-07 16:07:42,638][14395] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
|