Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

.summary/0/events.out.tfevents.1730987699.ali +3 -0
README.md +1 -1
checkpoint_p0/best_000004378_17932288_reward_4.918.pth +3 -0
checkpoint_p0/checkpoint_000004691_19214336.pth +3 -0
checkpoint_p0/checkpoint_000004884_20004864.pth +3 -0
config.json +2 -2
replay.mp4 +2 -2
sf_log.txt +787 -0

.summary/0/events.out.tfevents.1730987699.ali ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87059ecbdc1670d0818d006cee4eb4c1c8f0c91d0d12d9116f813c80be81e325
+size 375709

README.md CHANGED Viewed

@@ -15,7 +15,7 @@ model-index:
       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
-      value: 3.81 +/- 0.46
       name: mean_reward
       verified: false
 ---

       type: doom_health_gathering_supreme
     metrics:
     - type: mean_reward
+      value: 3.83 +/- 0.50
       name: mean_reward
       verified: false
 ---

checkpoint_p0/best_000004378_17932288_reward_4.918.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecda5f7116bb9e8b82a8f5a81375c46de9759c64337027aa765684d78f80e243
+size 34929243

checkpoint_p0/checkpoint_000004691_19214336.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b197e2a9b541c78f571b125dc9e75b3d5088da50bc45baf2aa8ecffbed78f48b
+size 34929669

checkpoint_p0/checkpoint_000004884_20004864.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddd5a144c2fb8ea19a7a0d46d6c5d64863e32ea3dc3a02d29d19a7fd0afcc009
+size 34929669

config.json CHANGED Viewed

@@ -15,7 +15,7 @@
   "worker_num_splits": 2,
   "policy_workers_per_policy": 1,
   "max_policy_lag": 1000,
-  "num_workers": 10,
   "num_envs_per_worker": 4,
   "batch_size": 1024,
   "num_batches_per_epoch": 1,
@@ -65,7 +65,7 @@
   "summaries_use_frameskip": true,
   "heartbeat_interval": 20,
   "heartbeat_reporting_interval": 600,
-  "train_for_env_steps": 4000000,
   "train_for_seconds": 10000000000,
   "save_every_sec": 120,
   "keep_checkpoints": 2,

   "worker_num_splits": 2,
   "policy_workers_per_policy": 1,
   "max_policy_lag": 1000,
+  "num_workers": 8,
   "num_envs_per_worker": 4,
   "batch_size": 1024,
   "num_batches_per_epoch": 1,
   "summaries_use_frameskip": true,
   "heartbeat_interval": 20,
   "heartbeat_reporting_interval": 600,
+  "train_for_env_steps": 20000000,
   "train_for_seconds": 10000000000,
   "save_every_sec": 120,
   "keep_checkpoints": 2,

replay.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37f9723db5bf8d58e69570838f86321d5b61c9daef0e0eed219cccfba4dcdbc0
-size 5588709

 version https://git-lfs.github.com/spec/v1
+oid sha256:90598894c4dea90f4e243e09fa039d69af671c31d0bd0e712e25ff87a80bafba
+size 5731726

sf_log.txt CHANGED Viewed

@@ -8824,3 +8824,790 @@ main_loop: 43.0008
 [2024-11-07 15:41:09,303][04584] Avg episode rewards: #0: 4.008, true rewards: #0: 3.808
 [2024-11-07 15:41:09,305][04584] Avg episode reward: 4.008, avg true_objective: 3.808
 [2024-11-07 15:41:24,061][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!

 [2024-11-07 15:41:09,303][04584] Avg episode rewards: #0: 4.008, true rewards: #0: 3.808
 [2024-11-07 15:41:09,305][04584] Avg episode reward: 4.008, avg true_objective: 3.808
 [2024-11-07 15:41:24,061][04584] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
+[2024-11-07 15:41:33,571][04584] The model has been pushed to https://huggingface.co/alidenewade/rl_course_vizdoom_health_gathering_supreme
+[2024-11-07 15:55:04,579][14395] Saving configuration to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json...
+[2024-11-07 15:55:04,580][14395] Rollout worker 0 uses device cpu
+[2024-11-07 15:55:04,583][14395] Rollout worker 1 uses device cpu
+[2024-11-07 15:55:04,586][14395] Rollout worker 2 uses device cpu
+[2024-11-07 15:55:04,590][14395] Rollout worker 3 uses device cpu
+[2024-11-07 15:55:04,592][14395] Rollout worker 4 uses device cpu
+[2024-11-07 15:55:04,594][14395] Rollout worker 5 uses device cpu
+[2024-11-07 15:55:04,597][14395] Rollout worker 6 uses device cpu
+[2024-11-07 15:55:04,599][14395] Rollout worker 7 uses device cpu
+[2024-11-07 15:55:04,705][14395] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2024-11-07 15:55:04,709][14395] InferenceWorker_p0-w0: min num requests: 2
+[2024-11-07 15:55:04,753][14395] Starting all processes...
+[2024-11-07 15:55:04,756][14395] Starting process learner_proc0
+[2024-11-07 15:55:04,934][14395] Starting all processes...
+[2024-11-07 15:55:05,018][14395] Starting process inference_proc0-0
+[2024-11-07 15:55:05,019][14395] Starting process rollout_proc0
+[2024-11-07 15:55:05,020][14395] Starting process rollout_proc1
+[2024-11-07 15:55:05,020][14395] Starting process rollout_proc2
+[2024-11-07 15:55:05,021][14395] Starting process rollout_proc3
+[2024-11-07 15:55:05,021][14395] Starting process rollout_proc4
+[2024-11-07 15:55:05,022][14395] Starting process rollout_proc5
+[2024-11-07 15:55:05,022][14395] Starting process rollout_proc6
+[2024-11-07 15:55:05,023][14395] Starting process rollout_proc7
+[2024-11-07 15:55:11,298][14445] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2024-11-07 15:55:11,313][14445] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+[2024-11-07 15:55:11,489][14477] Worker 4 uses CPU cores [4]
+[2024-11-07 15:55:11,561][14466] Worker 2 uses CPU cores [2]
+[2024-11-07 15:55:12,164][14445] Num visible devices: 1
+[2024-11-07 15:55:12,223][14445] Starting seed is not provided
+[2024-11-07 15:55:12,223][14445] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2024-11-07 15:55:12,223][14445] Initializing actor-critic model on device cuda:0
+[2024-11-07 15:55:12,224][14445] RunningMeanStd input shape: (3, 72, 128)
+[2024-11-07 15:55:12,226][14445] RunningMeanStd input shape: (1,)
+[2024-11-07 15:55:12,331][14445] ConvEncoder: input_channels=3
+[2024-11-07 15:55:12,727][14468] Worker 3 uses CPU cores [3]
+[2024-11-07 15:55:12,770][14445] Conv encoder output size: 512
+[2024-11-07 15:55:12,771][14445] Policy head output size: 512
+[2024-11-07 15:55:12,806][14445] Created Actor Critic model with architecture:
+[2024-11-07 15:55:12,807][14445] ActorCriticSharedWeights(
+  (obs_normalizer): ObservationNormalizer(
+    (running_mean_std): RunningMeanStdDictInPlace(
+      (running_mean_std): ModuleDict(
+        (obs): RunningMeanStdInPlace()
+      )
+    )
+  )
+  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+  (encoder): VizdoomEncoder(
+    (basic_encoder): ConvEncoder(
+      (enc): RecursiveScriptModule(
+        original_name=ConvEncoderImpl
+        (conv_head): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Conv2d)
+          (1): RecursiveScriptModule(original_name=ELU)
+          (2): RecursiveScriptModule(original_name=Conv2d)
+          (3): RecursiveScriptModule(original_name=ELU)
+          (4): RecursiveScriptModule(original_name=Conv2d)
+          (5): RecursiveScriptModule(original_name=ELU)
+        )
+        (mlp_layers): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Linear)
+          (1): RecursiveScriptModule(original_name=ELU)
+        )
+      )
+    )
+  )
+  (core): ModelCoreRNN(
+    (core): GRU(512, 512)
+  )
+  (decoder): MlpDecoder(
+    (mlp): Identity()
+  )
+  (critic_linear): Linear(in_features=512, out_features=1, bias=True)
+  (action_parameterization): ActionParameterizationDefault(
+    (distribution_linear): Linear(in_features=512, out_features=5, bias=True)
+  )
+)
+[2024-11-07 15:55:13,345][14469] Worker 7 uses CPU cores [0, 1, 2, 3, 4, 5, 6]
+[2024-11-07 15:55:13,552][14467] Worker 1 uses CPU cores [1]
+[2024-11-07 15:55:13,584][14462] Worker 0 uses CPU cores [0]
+[2024-11-07 15:55:13,653][14461] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2024-11-07 15:55:13,654][14461] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+[2024-11-07 15:55:13,687][14461] Num visible devices: 1
+[2024-11-07 15:55:14,018][14478] Worker 5 uses CPU cores [5]
+[2024-11-07 15:55:14,479][14470] Worker 6 uses CPU cores [6]
+[2024-11-07 15:55:14,948][14445] Using optimizer <class 'torch.optim.adam.Adam'>
+[2024-11-07 15:55:16,722][14445] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003910_16015360.pth...
+[2024-11-07 15:55:16,819][14445] Loading model from checkpoint
+[2024-11-07 15:55:16,822][14445] Loaded experiment state at self.train_step=3910, self.env_steps=16015360
+[2024-11-07 15:55:16,822][14445] Initialized policy 0 weights for model version 3910
+[2024-11-07 15:55:16,834][14445] LearnerWorker_p0 finished initialization!
+[2024-11-07 15:55:16,834][14445] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2024-11-07 15:55:17,090][14461] RunningMeanStd input shape: (3, 72, 128)
+[2024-11-07 15:55:17,092][14461] RunningMeanStd input shape: (1,)
+[2024-11-07 15:55:17,104][14461] ConvEncoder: input_channels=3
+[2024-11-07 15:55:17,233][14461] Conv encoder output size: 512
+[2024-11-07 15:55:17,234][14461] Policy head output size: 512
+[2024-11-07 15:55:17,302][14395] Inference worker 0-0 is ready!
+[2024-11-07 15:55:17,303][14395] All inference workers are ready! Signal rollout workers to start!
+[2024-11-07 15:55:17,409][14477] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:55:17,415][14468] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:55:17,439][14466] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:55:17,467][14467] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:55:17,478][14462] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:55:17,512][14478] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:55:17,571][14470] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:55:17,576][14469] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 15:55:18,028][14468] Decorrelating experience for 0 frames...
+[2024-11-07 15:55:18,032][14477] Decorrelating experience for 0 frames...
+[2024-11-07 15:55:18,032][14466] Decorrelating experience for 0 frames...
+[2024-11-07 15:55:18,261][14467] Decorrelating experience for 0 frames...
+[2024-11-07 15:55:18,314][14462] Decorrelating experience for 0 frames...
+[2024-11-07 15:55:18,449][14469] Decorrelating experience for 0 frames...
+[2024-11-07 15:55:18,583][14477] Decorrelating experience for 32 frames...
+[2024-11-07 15:55:18,825][14468] Decorrelating experience for 32 frames...
+[2024-11-07 15:55:18,828][14467] Decorrelating experience for 32 frames...
+[2024-11-07 15:55:18,837][14462] Decorrelating experience for 32 frames...
+[2024-11-07 15:55:18,977][14478] Decorrelating experience for 0 frames...
+[2024-11-07 15:55:19,335][14477] Decorrelating experience for 64 frames...
+[2024-11-07 15:55:19,454][14478] Decorrelating experience for 32 frames...
+[2024-11-07 15:55:19,456][14466] Decorrelating experience for 32 frames...
+[2024-11-07 15:55:19,552][14462] Decorrelating experience for 64 frames...
+[2024-11-07 15:55:19,814][14477] Decorrelating experience for 96 frames...
+[2024-11-07 15:55:19,889][14470] Decorrelating experience for 0 frames...
+[2024-11-07 15:55:19,910][14467] Decorrelating experience for 64 frames...
+[2024-11-07 15:55:19,927][14395] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 16015360. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2024-11-07 15:55:20,253][14462] Decorrelating experience for 96 frames...
+[2024-11-07 15:55:20,461][14478] Decorrelating experience for 64 frames...
+[2024-11-07 15:55:20,464][14466] Decorrelating experience for 64 frames...
+[2024-11-07 15:55:20,466][14469] Decorrelating experience for 32 frames...
+[2024-11-07 15:55:20,646][14470] Decorrelating experience for 32 frames...
+[2024-11-07 15:55:20,674][14467] Decorrelating experience for 96 frames...
+[2024-11-07 15:55:21,007][14468] Decorrelating experience for 64 frames...
+[2024-11-07 15:55:21,032][14466] Decorrelating experience for 96 frames...
+[2024-11-07 15:55:21,394][14478] Decorrelating experience for 96 frames...
+[2024-11-07 15:55:21,694][14468] Decorrelating experience for 96 frames...
+[2024-11-07 15:55:21,701][14469] Decorrelating experience for 64 frames...
+[2024-11-07 15:55:21,913][14470] Decorrelating experience for 64 frames...
+[2024-11-07 15:55:22,539][14469] Decorrelating experience for 96 frames...
+[2024-11-07 15:55:23,773][14445] Signal inference workers to stop experience collection...
+[2024-11-07 15:55:23,794][14461] InferenceWorker_p0-w0: stopping experience collection
+[2024-11-07 15:55:23,856][14470] Decorrelating experience for 96 frames...
+[2024-11-07 15:55:25,856][14395] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 16015360. Throughput: 0: 338.7. Samples: 2008. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2024-11-07 15:55:25,860][14395] Avg episode reward: [(0, '2.242')]
+[2024-11-07 15:55:25,865][14395] Heartbeat connected on RolloutWorker_w1
+[2024-11-07 15:55:25,866][14395] Heartbeat connected on InferenceWorker_p0-w0
+[2024-11-07 15:55:25,869][14395] Heartbeat connected on RolloutWorker_w6
+[2024-11-07 15:55:25,870][14395] Heartbeat connected on RolloutWorker_w7
+[2024-11-07 15:55:25,872][14395] Heartbeat connected on RolloutWorker_w2
+[2024-11-07 15:55:25,873][14395] Heartbeat connected on RolloutWorker_w0
+[2024-11-07 15:55:25,876][14395] Heartbeat connected on Batcher_0
+[2024-11-07 15:55:25,877][14395] Heartbeat connected on RolloutWorker_w5
+[2024-11-07 15:55:25,883][14395] Heartbeat connected on RolloutWorker_w3
+[2024-11-07 15:55:25,889][14395] Heartbeat connected on RolloutWorker_w4
+[2024-11-07 15:55:29,927][14395] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 16015360. Throughput: 0: 247.8. Samples: 2478. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2024-11-07 15:55:29,929][14395] Avg episode reward: [(0, '2.242')]
+[2024-11-07 15:55:30,500][14445] Signal inference workers to resume experience collection...
+[2024-11-07 15:55:30,501][14461] InferenceWorker_p0-w0: resuming experience collection
+[2024-11-07 15:55:31,048][14395] Heartbeat connected on LearnerWorker_p0
+[2024-11-07 15:55:34,927][14395] Fps is (10 sec: 3160.8, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 16044032. Throughput: 0: 313.5. Samples: 4702. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:55:34,933][14395] Avg episode reward: [(0, '3.523')]
+[2024-11-07 15:55:36,987][14461] Updated weights for policy 0, policy_version 3920 (0.0055)
+[2024-11-07 15:55:39,928][14395] Fps is (10 sec: 5734.1, 60 sec: 2867.2, 300 sec: 2867.2). Total num frames: 16072704. Throughput: 0: 644.5. Samples: 12890. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 15:55:39,934][14395] Avg episode reward: [(0, '4.344')]
+[2024-11-07 15:55:44,929][14395] Fps is (10 sec: 4914.8, 60 sec: 3112.9, 300 sec: 3112.9). Total num frames: 16093184. Throughput: 0: 794.9. Samples: 19872. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 15:55:44,931][14395] Avg episode reward: [(0, '4.448')]
+[2024-11-07 15:55:45,034][14461] Updated weights for policy 0, policy_version 3930 (0.0028)
+[2024-11-07 15:55:49,927][14395] Fps is (10 sec: 4915.4, 60 sec: 3549.9, 300 sec: 3549.9). Total num frames: 16121856. Throughput: 0: 820.3. Samples: 24610. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 15:55:49,929][14395] Avg episode reward: [(0, '4.447')]
+[2024-11-07 15:55:52,269][14461] Updated weights for policy 0, policy_version 3940 (0.0045)
+[2024-11-07 15:55:54,927][14395] Fps is (10 sec: 6554.4, 60 sec: 4096.0, 300 sec: 4096.0). Total num frames: 16158720. Throughput: 0: 957.8. Samples: 33524. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:55:54,930][14395] Avg episode reward: [(0, '4.553')]
+[2024-11-07 15:55:58,004][14461] Updated weights for policy 0, policy_version 3950 (0.0031)
+[2024-11-07 15:55:59,928][14395] Fps is (10 sec: 5734.2, 60 sec: 4096.0, 300 sec: 4096.0). Total num frames: 16179200. Throughput: 0: 1050.7. Samples: 42028. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:55:59,934][14395] Avg episode reward: [(0, '4.512')]
+[2024-11-07 15:56:04,928][14395] Fps is (10 sec: 4915.1, 60 sec: 4278.1, 300 sec: 4278.1). Total num frames: 16207872. Throughput: 0: 1004.8. Samples: 45216. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 15:56:04,930][14395] Avg episode reward: [(0, '4.356')]
+[2024-11-07 15:56:06,269][14461] Updated weights for policy 0, policy_version 3960 (0.0029)
+[2024-11-07 15:56:09,927][14395] Fps is (10 sec: 6963.4, 60 sec: 4669.5, 300 sec: 4669.5). Total num frames: 16248832. Throughput: 0: 1221.1. Samples: 55822. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 15:56:09,929][14395] Avg episode reward: [(0, '4.361')]
+[2024-11-07 15:56:11,796][14461] Updated weights for policy 0, policy_version 3970 (0.0027)
+[2024-11-07 15:56:14,936][14395] Fps is (10 sec: 6548.2, 60 sec: 4691.1, 300 sec: 4691.1). Total num frames: 16273408. Throughput: 0: 1383.7. Samples: 64754. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 15:56:14,939][14395] Avg episode reward: [(0, '4.444')]
+[2024-11-07 15:56:19,928][14395] Fps is (10 sec: 4095.7, 60 sec: 4573.8, 300 sec: 4573.8). Total num frames: 16289792. Throughput: 0: 1394.7. Samples: 67462. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:56:19,931][14395] Avg episode reward: [(0, '4.359')]
+[2024-11-07 15:56:21,825][14461] Updated weights for policy 0, policy_version 3980 (0.0043)
+[2024-11-07 15:56:24,927][14395] Fps is (10 sec: 4509.4, 60 sec: 5131.2, 300 sec: 4663.2). Total num frames: 16318464. Throughput: 0: 1371.5. Samples: 74606. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:56:24,932][14395] Avg episode reward: [(0, '4.307')]
+[2024-11-07 15:56:27,554][14461] Updated weights for policy 0, policy_version 3990 (0.0023)
+[2024-11-07 15:56:29,928][14395] Fps is (10 sec: 6553.7, 60 sec: 5666.1, 300 sec: 4856.7). Total num frames: 16355328. Throughput: 0: 1448.9. Samples: 85070. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:56:29,931][14395] Avg episode reward: [(0, '4.548')]
+[2024-11-07 15:56:34,927][14395] Fps is (10 sec: 5734.3, 60 sec: 5529.6, 300 sec: 4806.0). Total num frames: 16375808. Throughput: 0: 1439.1. Samples: 89368. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
+[2024-11-07 15:56:34,929][14395] Avg episode reward: [(0, '4.400')]
+[2024-11-07 15:56:35,600][14461] Updated weights for policy 0, policy_version 4000 (0.0043)
+[2024-11-07 15:56:39,927][14395] Fps is (10 sec: 6144.3, 60 sec: 5734.4, 300 sec: 5017.6). Total num frames: 16416768. Throughput: 0: 1447.8. Samples: 98676. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
+[2024-11-07 15:56:39,929][14395] Avg episode reward: [(0, '4.556')]
+[2024-11-07 15:56:40,909][14461] Updated weights for policy 0, policy_version 4010 (0.0028)
+[2024-11-07 15:56:44,927][14395] Fps is (10 sec: 6553.6, 60 sec: 5802.8, 300 sec: 5011.6). Total num frames: 16441344. Throughput: 0: 1453.7. Samples: 107446. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 15:56:44,940][14395] Avg episode reward: [(0, '4.731')]
+[2024-11-07 15:56:49,927][14395] Fps is (10 sec: 4505.5, 60 sec: 5666.1, 300 sec: 4960.7). Total num frames: 16461824. Throughput: 0: 1449.9. Samples: 110462. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:56:49,930][14395] Avg episode reward: [(0, '4.555')]
+[2024-11-07 15:56:50,788][14461] Updated weights for policy 0, policy_version 4020 (0.0026)
+[2024-11-07 15:56:54,928][14395] Fps is (10 sec: 4095.9, 60 sec: 5393.0, 300 sec: 4915.2). Total num frames: 16482304. Throughput: 0: 1338.9. Samples: 116072. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:56:54,932][14395] Avg episode reward: [(0, '4.357')]
+[2024-11-07 15:56:59,928][14395] Fps is (10 sec: 4095.9, 60 sec: 5393.1, 300 sec: 4874.2). Total num frames: 16502784. Throughput: 0: 1269.0. Samples: 121850. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:56:59,932][14395] Avg episode reward: [(0, '4.413')]
+[2024-11-07 15:56:59,977][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004029_16502784.pth...
+[2024-11-07 15:57:00,368][14445] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003908_16007168.pth
+[2024-11-07 15:57:00,908][14461] Updated weights for policy 0, policy_version 4030 (0.0067)
+[2024-11-07 15:57:04,928][14395] Fps is (10 sec: 4505.4, 60 sec: 5324.7, 300 sec: 4876.2). Total num frames: 16527360. Throughput: 0: 1281.3. Samples: 125120. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 15:57:04,931][14395] Avg episode reward: [(0, '4.501')]
+[2024-11-07 15:57:09,928][14395] Fps is (10 sec: 4095.6, 60 sec: 4915.1, 300 sec: 4803.4). Total num frames: 16543744. Throughput: 0: 1247.7. Samples: 130756. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:57:09,931][14395] Avg episode reward: [(0, '4.539')]
+[2024-11-07 15:57:10,937][14461] Updated weights for policy 0, policy_version 4040 (0.0040)
+[2024-11-07 15:57:14,927][14395] Fps is (10 sec: 4096.3, 60 sec: 4915.9, 300 sec: 4808.4). Total num frames: 16568320. Throughput: 0: 1192.9. Samples: 138748. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:57:14,930][14395] Avg episode reward: [(0, '4.564')]
+[2024-11-07 15:57:19,301][14461] Updated weights for policy 0, policy_version 4050 (0.0059)
+[2024-11-07 15:57:19,935][14395] Fps is (10 sec: 4502.5, 60 sec: 4982.8, 300 sec: 4778.3). Total num frames: 16588800. Throughput: 0: 1185.9. Samples: 142742. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 15:57:19,938][14395] Avg episode reward: [(0, '4.485')]
+[2024-11-07 15:57:24,940][14395] Fps is (10 sec: 4499.7, 60 sec: 4914.1, 300 sec: 4783.6). Total num frames: 16613376. Throughput: 0: 1116.7. Samples: 148942. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:57:24,942][14395] Avg episode reward: [(0, '4.436')]
+[2024-11-07 15:57:27,985][14461] Updated weights for policy 0, policy_version 4060 (0.0038)
+[2024-11-07 15:57:29,930][14395] Fps is (10 sec: 4917.8, 60 sec: 4710.2, 300 sec: 4789.1). Total num frames: 16637952. Throughput: 0: 1088.2. Samples: 156418. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 15:57:29,933][14395] Avg episode reward: [(0, '4.518')]
+[2024-11-07 15:57:34,927][14395] Fps is (10 sec: 5331.8, 60 sec: 4846.9, 300 sec: 4824.2). Total num frames: 16666624. Throughput: 0: 1114.8. Samples: 160630. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:57:34,936][14395] Avg episode reward: [(0, '4.508')]
+[2024-11-07 15:57:35,266][14461] Updated weights for policy 0, policy_version 4070 (0.0031)
+[2024-11-07 15:57:39,927][14395] Fps is (10 sec: 4916.6, 60 sec: 4505.6, 300 sec: 4798.2). Total num frames: 16687104. Throughput: 0: 1131.8. Samples: 167002. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:57:39,931][14395] Avg episode reward: [(0, '4.463')]
+[2024-11-07 15:57:44,927][14395] Fps is (10 sec: 3276.8, 60 sec: 4300.8, 300 sec: 4717.5). Total num frames: 16699392. Throughput: 0: 1109.5. Samples: 171778. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 15:57:44,931][14395] Avg episode reward: [(0, '4.400')]
+[2024-11-07 15:57:46,388][14461] Updated weights for policy 0, policy_version 4080 (0.0058)
+[2024-11-07 15:57:49,927][14395] Fps is (10 sec: 4915.3, 60 sec: 4573.9, 300 sec: 4806.0). Total num frames: 16736256. Throughput: 0: 1139.0. Samples: 176372. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:57:49,929][14395] Avg episode reward: [(0, '4.383')]
+[2024-11-07 15:57:51,689][14461] Updated weights for policy 0, policy_version 4090 (0.0024)
+[2024-11-07 15:57:54,927][14395] Fps is (10 sec: 7782.4, 60 sec: 4915.2, 300 sec: 4915.2). Total num frames: 16777216. Throughput: 0: 1275.9. Samples: 188170. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 15:57:54,929][14395] Avg episode reward: [(0, '4.469')]
+[2024-11-07 15:57:57,147][14461] Updated weights for policy 0, policy_version 4100 (0.0032)
+[2024-11-07 15:57:59,927][14395] Fps is (10 sec: 7372.9, 60 sec: 5120.0, 300 sec: 4966.4). Total num frames: 16809984. Throughput: 0: 1340.7. Samples: 199080. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:57:59,929][14395] Avg episode reward: [(0, '4.689')]
+[2024-11-07 15:58:04,528][14461] Updated weights for policy 0, policy_version 4110 (0.0048)
+[2024-11-07 15:58:04,927][14395] Fps is (10 sec: 5734.4, 60 sec: 5120.1, 300 sec: 4964.9). Total num frames: 16834560. Throughput: 0: 1332.6. Samples: 202700. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:58:04,929][14395] Avg episode reward: [(0, '4.492')]
+[2024-11-07 15:58:09,927][14395] Fps is (10 sec: 6144.0, 60 sec: 5461.5, 300 sec: 5035.7). Total num frames: 16871424. Throughput: 0: 1410.6. Samples: 212400. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 15:58:09,929][14395] Avg episode reward: [(0, '4.335')]
+[2024-11-07 15:58:10,164][14461] Updated weights for policy 0, policy_version 4120 (0.0027)
+[2024-11-07 15:58:16,757][14395] Fps is (10 sec: 5193.6, 60 sec: 5299.7, 300 sec: 4980.2). Total num frames: 16896000. Throughput: 0: 1304.1. Samples: 217486. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 15:58:16,759][14395] Avg episode reward: [(0, '4.390')]
+[2024-11-07 15:58:19,359][14461] Updated weights for policy 0, policy_version 4130 (0.0031)
+[2024-11-07 15:58:19,927][14395] Fps is (10 sec: 4505.6, 60 sec: 5462.1, 300 sec: 5006.2). Total num frames: 16916480. Throughput: 0: 1369.6. Samples: 222262. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
+[2024-11-07 15:58:19,930][14395] Avg episode reward: [(0, '4.352')]
+[2024-11-07 15:58:24,928][14395] Fps is (10 sec: 6517.1, 60 sec: 5599.1, 300 sec: 5048.0). Total num frames: 16949248. Throughput: 0: 1459.8. Samples: 232694. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 15:58:24,930][14395] Avg episode reward: [(0, '4.351')]
+[2024-11-07 15:58:25,722][14461] Updated weights for policy 0, policy_version 4140 (0.0038)
+[2024-11-07 15:58:29,927][14395] Fps is (10 sec: 6553.6, 60 sec: 5734.7, 300 sec: 5087.7). Total num frames: 16982016. Throughput: 0: 1550.2. Samples: 241538. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 15:58:29,929][14395] Avg episode reward: [(0, '4.556')]
+[2024-11-07 15:58:31,857][14461] Updated weights for policy 0, policy_version 4150 (0.0032)
+[2024-11-07 15:58:34,927][14395] Fps is (10 sec: 7373.2, 60 sec: 5939.2, 300 sec: 5167.3). Total num frames: 17022976. Throughput: 0: 1578.4. Samples: 247402. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:58:34,929][14395] Avg episode reward: [(0, '4.514')]
+[2024-11-07 15:58:37,073][14461] Updated weights for policy 0, policy_version 4160 (0.0021)
+[2024-11-07 15:58:39,927][14395] Fps is (10 sec: 7782.4, 60 sec: 6212.3, 300 sec: 5222.4). Total num frames: 17059840. Throughput: 0: 1581.1. Samples: 259320. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:58:39,928][14395] Avg episode reward: [(0, '4.365')]
+[2024-11-07 15:58:42,649][14461] Updated weights for policy 0, policy_version 4170 (0.0027)
+[2024-11-07 15:58:44,927][14395] Fps is (10 sec: 6963.1, 60 sec: 6553.6, 300 sec: 5254.9). Total num frames: 17092608. Throughput: 0: 1568.2. Samples: 269650. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:58:44,929][14395] Avg episode reward: [(0, '4.379')]
+[2024-11-07 15:58:48,952][14461] Updated weights for policy 0, policy_version 4180 (0.0025)
+[2024-11-07 15:58:51,218][14395] Fps is (10 sec: 5441.7, 60 sec: 6282.0, 300 sec: 5234.1). Total num frames: 17121280. Throughput: 0: 1553.4. Samples: 274606. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 15:58:51,220][14395] Avg episode reward: [(0, '4.417')]
+[2024-11-07 15:58:54,927][14395] Fps is (10 sec: 5324.6, 60 sec: 6144.0, 300 sec: 5258.1). Total num frames: 17145856. Throughput: 0: 1511.7. Samples: 280426. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:58:54,930][14395] Avg episode reward: [(0, '4.625')]
+[2024-11-07 15:58:57,073][14461] Updated weights for policy 0, policy_version 4190 (0.0026)
+[2024-11-07 15:58:59,927][14395] Fps is (10 sec: 7054.4, 60 sec: 6212.3, 300 sec: 5306.2). Total num frames: 17182720. Throughput: 0: 1721.4. Samples: 291798. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:58:59,929][14395] Avg episode reward: [(0, '4.445')]
+[2024-11-07 15:58:59,947][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004195_17182720.pth...
+[2024-11-07 15:59:00,087][14445] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000003910_16015360.pth
+[2024-11-07 15:59:02,802][14461] Updated weights for policy 0, policy_version 4200 (0.0023)
+[2024-11-07 15:59:04,928][14395] Fps is (10 sec: 7372.4, 60 sec: 6417.0, 300 sec: 5352.1). Total num frames: 17219584. Throughput: 0: 1662.1. Samples: 297056. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:59:04,931][14395] Avg episode reward: [(0, '4.652')]
+[2024-11-07 15:59:08,029][14461] Updated weights for policy 0, policy_version 4210 (0.0024)
+[2024-11-07 15:59:09,930][14395] Fps is (10 sec: 7370.7, 60 sec: 6416.8, 300 sec: 5396.0). Total num frames: 17256448. Throughput: 0: 1687.7. Samples: 308644. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 15:59:09,933][14395] Avg episode reward: [(0, '4.373')]
+[2024-11-07 15:59:14,459][14461] Updated weights for policy 0, policy_version 4220 (0.0045)
+[2024-11-07 15:59:14,927][14395] Fps is (10 sec: 6554.2, 60 sec: 6689.4, 300 sec: 5403.2). Total num frames: 17285120. Throughput: 0: 1698.0. Samples: 317946. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 15:59:14,932][14395] Avg episode reward: [(0, '4.586')]
+[2024-11-07 15:59:19,928][14395] Fps is (10 sec: 6145.4, 60 sec: 6690.1, 300 sec: 5427.2). Total num frames: 17317888. Throughput: 0: 1677.6. Samples: 322894. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 15:59:19,931][14395] Avg episode reward: [(0, '4.264')]
+[2024-11-07 15:59:20,972][14461] Updated weights for policy 0, policy_version 4230 (0.0044)
+[2024-11-07 15:59:25,496][14395] Fps is (10 sec: 4650.4, 60 sec: 6356.8, 300 sec: 5370.8). Total num frames: 17334272. Throughput: 0: 1578.1. Samples: 331234. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:59:25,502][14395] Avg episode reward: [(0, '4.232')]
+[2024-11-07 15:59:29,927][14395] Fps is (10 sec: 3686.5, 60 sec: 6212.2, 300 sec: 5357.6). Total num frames: 17354752. Throughput: 0: 1457.4. Samples: 335232. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:59:29,929][14395] Avg episode reward: [(0, '4.326')]
+[2024-11-07 15:59:31,611][14461] Updated weights for policy 0, policy_version 4240 (0.0051)
+[2024-11-07 15:59:34,927][14395] Fps is (10 sec: 5646.1, 60 sec: 6075.7, 300 sec: 5381.0). Total num frames: 17387520. Throughput: 0: 1491.8. Samples: 339814. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 15:59:34,929][14395] Avg episode reward: [(0, '4.443')]
+[2024-11-07 15:59:37,263][14461] Updated weights for policy 0, policy_version 4250 (0.0036)
+[2024-11-07 15:59:39,927][14395] Fps is (10 sec: 7373.0, 60 sec: 6144.0, 300 sec: 5435.1). Total num frames: 17428480. Throughput: 0: 1568.5. Samples: 351010. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 15:59:39,932][14395] Avg episode reward: [(0, '4.296')]
+[2024-11-07 15:59:42,885][14461] Updated weights for policy 0, policy_version 4260 (0.0025)
+[2024-11-07 15:59:44,927][14395] Fps is (10 sec: 7373.0, 60 sec: 6144.0, 300 sec: 5456.2). Total num frames: 17461248. Throughput: 0: 1548.5. Samples: 361482. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:59:44,930][14395] Avg episode reward: [(0, '4.273')]
+[2024-11-07 15:59:48,105][14461] Updated weights for policy 0, policy_version 4270 (0.0021)
+[2024-11-07 15:59:49,927][14395] Fps is (10 sec: 7372.7, 60 sec: 6488.3, 300 sec: 5506.8). Total num frames: 17502208. Throughput: 0: 1569.5. Samples: 367682. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:59:49,929][14395] Avg episode reward: [(0, '4.594')]
+[2024-11-07 15:59:53,467][14461] Updated weights for policy 0, policy_version 4280 (0.0023)
+[2024-11-07 15:59:54,928][14395] Fps is (10 sec: 7782.0, 60 sec: 6553.6, 300 sec: 5540.8). Total num frames: 17539072. Throughput: 0: 1574.4. Samples: 379488. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 15:59:54,932][14395] Avg episode reward: [(0, '4.520')]
+[2024-11-07 16:00:00,137][14395] Fps is (10 sec: 5616.5, 60 sec: 6258.6, 300 sec: 5510.8). Total num frames: 17559552. Throughput: 0: 1484.8. Samples: 385072. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:00:00,140][14395] Avg episode reward: [(0, '4.470')]
+[2024-11-07 16:00:01,768][14461] Updated weights for policy 0, policy_version 4290 (0.0040)
+[2024-11-07 16:00:04,927][14395] Fps is (10 sec: 4505.7, 60 sec: 6075.8, 300 sec: 5504.5). Total num frames: 17584128. Throughput: 0: 1506.7. Samples: 390696. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:00:04,930][14395] Avg episode reward: [(0, '4.333')]
+[2024-11-07 16:00:08,597][14461] Updated weights for policy 0, policy_version 4300 (0.0031)
+[2024-11-07 16:00:09,927][14395] Fps is (10 sec: 6275.8, 60 sec: 6076.0, 300 sec: 5536.7). Total num frames: 17620992. Throughput: 0: 1537.9. Samples: 399564. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:00:09,929][14395] Avg episode reward: [(0, '4.285')]
+[2024-11-07 16:00:14,927][14395] Fps is (10 sec: 6553.6, 60 sec: 6075.7, 300 sec: 5540.0). Total num frames: 17649664. Throughput: 0: 1631.6. Samples: 408654. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:00:14,929][14395] Avg episode reward: [(0, '4.383')]
+[2024-11-07 16:00:15,813][14461] Updated weights for policy 0, policy_version 4310 (0.0032)
+[2024-11-07 16:00:19,927][14395] Fps is (10 sec: 5324.8, 60 sec: 5939.3, 300 sec: 5641.1). Total num frames: 17674240. Throughput: 0: 1607.7. Samples: 412162. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:00:19,929][14395] Avg episode reward: [(0, '4.445')]
+[2024-11-07 16:00:22,655][14461] Updated weights for policy 0, policy_version 4320 (0.0036)
+[2024-11-07 16:00:24,927][14395] Fps is (10 sec: 5734.5, 60 sec: 6271.8, 300 sec: 5734.4). Total num frames: 17707008. Throughput: 0: 1570.3. Samples: 421674. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:00:24,932][14395] Avg episode reward: [(0, '4.288')]
+[2024-11-07 16:00:28,572][14461] Updated weights for policy 0, policy_version 4330 (0.0029)
+[2024-11-07 16:00:29,927][14395] Fps is (10 sec: 6963.0, 60 sec: 6485.3, 300 sec: 5762.2). Total num frames: 17743872. Throughput: 0: 1572.7. Samples: 432254. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:00:29,929][14395] Avg episode reward: [(0, '4.498')]
+[2024-11-07 16:00:34,927][14395] Fps is (10 sec: 5734.4, 60 sec: 6280.6, 300 sec: 5734.4). Total num frames: 17764352. Throughput: 0: 1546.8. Samples: 437286. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:00:34,929][14395] Avg episode reward: [(0, '4.555')]
+[2024-11-07 16:00:37,269][14461] Updated weights for policy 0, policy_version 4340 (0.0042)
+[2024-11-07 16:00:39,927][14395] Fps is (10 sec: 4096.1, 60 sec: 5939.2, 300 sec: 5734.4). Total num frames: 17784832. Throughput: 0: 1386.6. Samples: 441886. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:00:39,929][14395] Avg episode reward: [(0, '4.577')]
+[2024-11-07 16:00:44,927][14395] Fps is (10 sec: 4505.5, 60 sec: 5802.6, 300 sec: 5720.5). Total num frames: 17809408. Throughput: 0: 1422.1. Samples: 448766. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:00:44,929][14395] Avg episode reward: [(0, '4.496')]
+[2024-11-07 16:00:46,121][14461] Updated weights for policy 0, policy_version 4350 (0.0056)
+[2024-11-07 16:00:49,927][14395] Fps is (10 sec: 5734.4, 60 sec: 5666.1, 300 sec: 5706.6). Total num frames: 17842176. Throughput: 0: 1381.6. Samples: 452868. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:00:49,929][14395] Avg episode reward: [(0, '4.489')]
+[2024-11-07 16:00:52,000][14461] Updated weights for policy 0, policy_version 4360 (0.0030)
+[2024-11-07 16:00:54,927][14395] Fps is (10 sec: 6963.4, 60 sec: 5666.2, 300 sec: 5762.2). Total num frames: 17879040. Throughput: 0: 1432.7. Samples: 464034. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:00:54,929][14395] Avg episode reward: [(0, '4.361')]
+[2024-11-07 16:00:58,239][14461] Updated weights for policy 0, policy_version 4370 (0.0043)
+[2024-11-07 16:00:59,931][14395] Fps is (10 sec: 6550.9, 60 sec: 5822.6, 300 sec: 5762.1). Total num frames: 17907712. Throughput: 0: 1439.8. Samples: 473450. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:00:59,937][14395] Avg episode reward: [(0, '4.600')]
+[2024-11-07 16:00:59,955][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004372_17907712.pth...
+[2024-11-07 16:01:00,145][14445] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004029_16502784.pth
+[2024-11-07 16:01:04,927][14395] Fps is (10 sec: 5324.7, 60 sec: 5802.7, 300 sec: 5706.6). Total num frames: 17932288. Throughput: 0: 1424.9. Samples: 476284. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:01:04,930][14395] Avg episode reward: [(0, '4.918')]
+[2024-11-07 16:01:04,934][14445] Saving new best policy, reward=4.918!
+[2024-11-07 16:01:06,342][14461] Updated weights for policy 0, policy_version 4380 (0.0053)
+[2024-11-07 16:01:09,927][14395] Fps is (10 sec: 3688.0, 60 sec: 5393.1, 300 sec: 5665.1). Total num frames: 17944576. Throughput: 0: 1347.9. Samples: 482328. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:01:09,929][14395] Avg episode reward: [(0, '4.684')]
+[2024-11-07 16:01:14,927][14395] Fps is (10 sec: 3686.4, 60 sec: 5324.8, 300 sec: 5692.8). Total num frames: 17969152. Throughput: 0: 1255.9. Samples: 488770. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:01:14,928][14395] Avg episode reward: [(0, '4.702')]
+[2024-11-07 16:01:16,923][14461] Updated weights for policy 0, policy_version 4390 (0.0039)
+[2024-11-07 16:01:19,927][14395] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5678.9). Total num frames: 17993728. Throughput: 0: 1229.5. Samples: 492612. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:01:19,929][14395] Avg episode reward: [(0, '4.364')]
+[2024-11-07 16:01:24,927][14395] Fps is (10 sec: 4915.3, 60 sec: 5188.3, 300 sec: 5637.2). Total num frames: 18018304. Throughput: 0: 1287.0. Samples: 499802. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:01:24,929][14395] Avg episode reward: [(0, '4.314')]
+[2024-11-07 16:01:25,040][14461] Updated weights for policy 0, policy_version 4400 (0.0054)
+[2024-11-07 16:01:29,928][14395] Fps is (10 sec: 5734.2, 60 sec: 5120.0, 300 sec: 5678.9). Total num frames: 18051072. Throughput: 0: 1352.4. Samples: 509622. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:01:29,931][14395] Avg episode reward: [(0, '4.397')]
+[2024-11-07 16:01:31,325][14461] Updated weights for policy 0, policy_version 4410 (0.0026)
+[2024-11-07 16:01:34,927][14395] Fps is (10 sec: 6553.6, 60 sec: 5324.8, 300 sec: 5651.1). Total num frames: 18083840. Throughput: 0: 1370.8. Samples: 514552. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
+[2024-11-07 16:01:34,931][14395] Avg episode reward: [(0, '4.457')]
+[2024-11-07 16:01:37,833][14461] Updated weights for policy 0, policy_version 4420 (0.0036)
+[2024-11-07 16:01:39,927][14395] Fps is (10 sec: 6553.7, 60 sec: 5529.6, 300 sec: 5678.9). Total num frames: 18116608. Throughput: 0: 1324.4. Samples: 523632. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
+[2024-11-07 16:01:39,929][14395] Avg episode reward: [(0, '4.292')]
+[2024-11-07 16:01:44,927][14395] Fps is (10 sec: 4915.2, 60 sec: 5393.1, 300 sec: 5665.0). Total num frames: 18132992. Throughput: 0: 1249.5. Samples: 529674. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:01:44,930][14395] Avg episode reward: [(0, '4.513')]
+[2024-11-07 16:01:46,301][14461] Updated weights for policy 0, policy_version 4430 (0.0022)
+[2024-11-07 16:01:49,927][14395] Fps is (10 sec: 4915.1, 60 sec: 5393.0, 300 sec: 5706.6). Total num frames: 18165760. Throughput: 0: 1301.3. Samples: 534842. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:01:49,929][14395] Avg episode reward: [(0, '4.579')]
+[2024-11-07 16:01:53,333][14461] Updated weights for policy 0, policy_version 4440 (0.0042)
+[2024-11-07 16:01:54,928][14395] Fps is (10 sec: 5733.9, 60 sec: 5188.2, 300 sec: 5720.5). Total num frames: 18190336. Throughput: 0: 1357.5. Samples: 543416. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:01:54,930][14395] Avg episode reward: [(0, '4.285')]
+[2024-11-07 16:01:59,270][14461] Updated weights for policy 0, policy_version 4450 (0.0025)
+[2024-11-07 16:01:59,927][14395] Fps is (10 sec: 6144.2, 60 sec: 5325.2, 300 sec: 5762.2). Total num frames: 18227200. Throughput: 0: 1444.0. Samples: 553752. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:01:59,928][14395] Avg episode reward: [(0, '4.199')]
+[2024-11-07 16:02:04,928][14395] Fps is (10 sec: 7373.1, 60 sec: 5529.6, 300 sec: 5831.6). Total num frames: 18264064. Throughput: 0: 1459.9. Samples: 558310. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:02:04,929][14395] Avg episode reward: [(0, '4.323')]
+[2024-11-07 16:02:05,171][14461] Updated weights for policy 0, policy_version 4460 (0.0035)
+[2024-11-07 16:02:09,927][14395] Fps is (10 sec: 6963.0, 60 sec: 5870.9, 300 sec: 5859.4). Total num frames: 18296832. Throughput: 0: 1556.9. Samples: 569862. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:02:09,930][14395] Avg episode reward: [(0, '4.337')]
+[2024-11-07 16:02:11,877][14461] Updated weights for policy 0, policy_version 4470 (0.0039)
+[2024-11-07 16:02:14,928][14395] Fps is (10 sec: 6553.6, 60 sec: 6007.4, 300 sec: 5901.2). Total num frames: 18329600. Throughput: 0: 1525.9. Samples: 578288. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:02:14,929][14395] Avg episode reward: [(0, '4.454')]
+[2024-11-07 16:02:19,928][14395] Fps is (10 sec: 4914.7, 60 sec: 5870.8, 300 sec: 5873.5). Total num frames: 18345984. Throughput: 0: 1478.4. Samples: 581084. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:02:19,930][14395] Avg episode reward: [(0, '4.500')]
+[2024-11-07 16:02:20,550][14461] Updated weights for policy 0, policy_version 4480 (0.0038)
+[2024-11-07 16:02:24,927][14395] Fps is (10 sec: 4915.3, 60 sec: 6007.4, 300 sec: 5901.1). Total num frames: 18378752. Throughput: 0: 1451.3. Samples: 588942. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:02:24,929][14395] Avg episode reward: [(0, '4.422')]
+[2024-11-07 16:02:26,971][14461] Updated weights for policy 0, policy_version 4490 (0.0040)
+[2024-11-07 16:02:29,928][14395] Fps is (10 sec: 6144.5, 60 sec: 5939.2, 300 sec: 5901.0). Total num frames: 18407424. Throughput: 0: 1530.0. Samples: 598524. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:02:29,932][14395] Avg episode reward: [(0, '4.494')]
+[2024-11-07 16:02:34,478][14461] Updated weights for policy 0, policy_version 4500 (0.0036)
+[2024-11-07 16:02:34,927][14395] Fps is (10 sec: 5324.8, 60 sec: 5802.7, 300 sec: 5914.9). Total num frames: 18432000. Throughput: 0: 1493.3. Samples: 602038. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
+[2024-11-07 16:02:34,930][14395] Avg episode reward: [(0, '4.464')]
+[2024-11-07 16:02:39,928][14395] Fps is (10 sec: 4915.2, 60 sec: 5666.1, 300 sec: 5956.5). Total num frames: 18456576. Throughput: 0: 1452.6. Samples: 608784. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
+[2024-11-07 16:02:39,930][14395] Avg episode reward: [(0, '4.351')]
+[2024-11-07 16:02:42,419][14461] Updated weights for policy 0, policy_version 4510 (0.0040)
+[2024-11-07 16:02:44,927][14395] Fps is (10 sec: 5324.8, 60 sec: 5870.9, 300 sec: 5928.8). Total num frames: 18485248. Throughput: 0: 1429.1. Samples: 618062. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
+[2024-11-07 16:02:44,929][14395] Avg episode reward: [(0, '4.379')]
+[2024-11-07 16:02:49,091][14461] Updated weights for policy 0, policy_version 4520 (0.0051)
+[2024-11-07 16:02:49,927][14395] Fps is (10 sec: 6144.2, 60 sec: 5870.9, 300 sec: 5901.0). Total num frames: 18518016. Throughput: 0: 1435.6. Samples: 622912. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:02:49,929][14395] Avg episode reward: [(0, '4.579')]
+[2024-11-07 16:02:54,927][14395] Fps is (10 sec: 4915.2, 60 sec: 5734.5, 300 sec: 5845.5). Total num frames: 18534400. Throughput: 0: 1298.6. Samples: 628298. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:02:54,929][14395] Avg episode reward: [(0, '4.503')]
+[2024-11-07 16:02:58,299][14461] Updated weights for policy 0, policy_version 4530 (0.0038)
+[2024-11-07 16:02:59,932][14395] Fps is (10 sec: 4503.7, 60 sec: 5597.5, 300 sec: 5859.3). Total num frames: 18563072. Throughput: 0: 1309.3. Samples: 637212. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:02:59,939][14395] Avg episode reward: [(0, '4.345')]
+[2024-11-07 16:02:59,951][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004532_18563072.pth...
+[2024-11-07 16:03:00,280][14445] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004195_17182720.pth
+[2024-11-07 16:03:04,571][14461] Updated weights for policy 0, policy_version 4540 (0.0023)
+[2024-11-07 16:03:04,927][14395] Fps is (10 sec: 6144.0, 60 sec: 5529.6, 300 sec: 5845.5). Total num frames: 18595840. Throughput: 0: 1357.9. Samples: 642186. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:03:04,940][14395] Avg episode reward: [(0, '4.329')]
+[2024-11-07 16:03:09,928][14395] Fps is (10 sec: 6146.5, 60 sec: 5461.3, 300 sec: 5895.9). Total num frames: 18624512. Throughput: 0: 1394.9. Samples: 651714. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:03:09,930][14395] Avg episode reward: [(0, '4.494')]
+[2024-11-07 16:03:11,780][14461] Updated weights for policy 0, policy_version 4550 (0.0036)
+[2024-11-07 16:03:14,927][14395] Fps is (10 sec: 6143.9, 60 sec: 5461.3, 300 sec: 5901.0). Total num frames: 18657280. Throughput: 0: 1379.3. Samples: 660592. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:03:14,930][14395] Avg episode reward: [(0, '4.480')]
+[2024-11-07 16:03:17,864][14461] Updated weights for policy 0, policy_version 4560 (0.0031)
+[2024-11-07 16:03:19,927][14395] Fps is (10 sec: 6144.1, 60 sec: 5666.2, 300 sec: 5887.1). Total num frames: 18685952. Throughput: 0: 1405.0. Samples: 665262. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
+[2024-11-07 16:03:19,930][14395] Avg episode reward: [(0, '4.341')]
+[2024-11-07 16:03:27,180][14395] Fps is (10 sec: 4680.1, 60 sec: 5395.3, 300 sec: 5828.7). Total num frames: 18714624. Throughput: 0: 1375.1. Samples: 673762. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:03:27,196][14395] Avg episode reward: [(0, '4.461')]
+[2024-11-07 16:03:27,887][14461] Updated weights for policy 0, policy_version 4570 (0.0042)
+[2024-11-07 16:03:29,928][14395] Fps is (10 sec: 3686.0, 60 sec: 5256.5, 300 sec: 5762.1). Total num frames: 18722816. Throughput: 0: 1323.3. Samples: 677612. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
+[2024-11-07 16:03:29,941][14395] Avg episode reward: [(0, '4.371')]
+[2024-11-07 16:03:34,927][14395] Fps is (10 sec: 4229.7, 60 sec: 5256.5, 300 sec: 5720.5). Total num frames: 18747392. Throughput: 0: 1290.2. Samples: 680972. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:03:34,929][14395] Avg episode reward: [(0, '4.341')]
+[2024-11-07 16:03:36,869][14461] Updated weights for policy 0, policy_version 4580 (0.0057)
+[2024-11-07 16:03:39,928][14395] Fps is (10 sec: 4915.4, 60 sec: 5256.5, 300 sec: 5692.7). Total num frames: 18771968. Throughput: 0: 1337.3. Samples: 688476. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:03:39,930][14395] Avg episode reward: [(0, '4.376')]
+[2024-11-07 16:03:44,774][14461] Updated weights for policy 0, policy_version 4590 (0.0065)
+[2024-11-07 16:03:44,928][14395] Fps is (10 sec: 5324.7, 60 sec: 5256.5, 300 sec: 5717.8). Total num frames: 18800640. Throughput: 0: 1316.3. Samples: 696440. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:03:44,930][14395] Avg episode reward: [(0, '4.450')]
+[2024-11-07 16:03:49,927][14395] Fps is (10 sec: 5325.2, 60 sec: 5120.0, 300 sec: 5692.8). Total num frames: 18825216. Throughput: 0: 1286.0. Samples: 700054. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:03:49,929][14395] Avg episode reward: [(0, '4.593')]
+[2024-11-07 16:03:52,136][14461] Updated weights for policy 0, policy_version 4600 (0.0042)
+[2024-11-07 16:03:54,927][14395] Fps is (10 sec: 5325.0, 60 sec: 5324.8, 300 sec: 5665.0). Total num frames: 18853888. Throughput: 0: 1271.2. Samples: 708916. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:03:54,930][14395] Avg episode reward: [(0, '4.477')]
+[2024-11-07 16:03:59,332][14461] Updated weights for policy 0, policy_version 4610 (0.0046)
+[2024-11-07 16:04:01,643][14395] Fps is (10 sec: 4894.5, 60 sec: 5177.1, 300 sec: 5604.6). Total num frames: 18882560. Throughput: 0: 1220.3. Samples: 717600. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:04:01,645][14395] Avg episode reward: [(0, '4.510')]
+[2024-11-07 16:04:04,927][14395] Fps is (10 sec: 4505.5, 60 sec: 5051.7, 300 sec: 5567.8). Total num frames: 18898944. Throughput: 0: 1185.0. Samples: 718586. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:04:04,935][14395] Avg episode reward: [(0, '4.586')]
+[2024-11-07 16:04:09,356][14461] Updated weights for policy 0, policy_version 4620 (0.0029)
+[2024-11-07 16:04:09,930][14395] Fps is (10 sec: 4942.8, 60 sec: 4983.3, 300 sec: 5553.8). Total num frames: 18923520. Throughput: 0: 1227.2. Samples: 726224. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:04:09,932][14395] Avg episode reward: [(0, '4.688')]
+[2024-11-07 16:04:14,927][14395] Fps is (10 sec: 5734.6, 60 sec: 4983.5, 300 sec: 5553.9). Total num frames: 18956288. Throughput: 0: 1282.8. Samples: 735336. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:04:14,928][14395] Avg episode reward: [(0, '4.624')]
+[2024-11-07 16:04:16,116][14461] Updated weights for policy 0, policy_version 4630 (0.0026)
+[2024-11-07 16:04:19,927][14395] Fps is (10 sec: 6145.6, 60 sec: 4983.5, 300 sec: 5606.4). Total num frames: 18984960. Throughput: 0: 1312.2. Samples: 740020. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:04:19,929][14395] Avg episode reward: [(0, '4.610')]
+[2024-11-07 16:04:22,497][14461] Updated weights for policy 0, policy_version 4640 (0.0031)
+[2024-11-07 16:04:24,927][14395] Fps is (10 sec: 6143.9, 60 sec: 5248.8, 300 sec: 5637.2). Total num frames: 19017728. Throughput: 0: 1357.9. Samples: 749580. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:04:24,931][14395] Avg episode reward: [(0, '4.613')]
+[2024-11-07 16:04:29,009][14461] Updated weights for policy 0, policy_version 4650 (0.0034)
+[2024-11-07 16:04:29,932][14395] Fps is (10 sec: 6550.6, 60 sec: 5461.0, 300 sec: 5637.1). Total num frames: 19050496. Throughput: 0: 1386.9. Samples: 758856. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:04:29,934][14395] Avg episode reward: [(0, '4.465')]
+[2024-11-07 16:04:36,334][14395] Fps is (10 sec: 5027.1, 60 sec: 5336.2, 300 sec: 5555.2). Total num frames: 19075072. Throughput: 0: 1359.5. Samples: 763146. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:04:36,336][14395] Avg episode reward: [(0, '4.551')]
+[2024-11-07 16:04:38,320][14461] Updated weights for policy 0, policy_version 4660 (0.0038)
+[2024-11-07 16:04:39,927][14395] Fps is (10 sec: 4507.7, 60 sec: 5393.1, 300 sec: 5540.0). Total num frames: 19095552. Throughput: 0: 1327.1. Samples: 768636. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:04:39,930][14395] Avg episode reward: [(0, '4.531')]
+[2024-11-07 16:04:44,927][14395] Fps is (10 sec: 5719.9, 60 sec: 5393.1, 300 sec: 5498.4). Total num frames: 19124224. Throughput: 0: 1389.3. Samples: 777736. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
+[2024-11-07 16:04:44,930][14395] Avg episode reward: [(0, '4.333')]
+[2024-11-07 16:04:45,079][14461] Updated weights for policy 0, policy_version 4670 (0.0038)
+[2024-11-07 16:04:49,927][14395] Fps is (10 sec: 6144.0, 60 sec: 5529.6, 300 sec: 5484.5). Total num frames: 19156992. Throughput: 0: 1415.9. Samples: 782302. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:04:49,929][14395] Avg episode reward: [(0, '4.411')]
+[2024-11-07 16:04:52,097][14461] Updated weights for policy 0, policy_version 4680 (0.0032)
+[2024-11-07 16:04:54,928][14395] Fps is (10 sec: 5734.2, 60 sec: 5461.3, 300 sec: 5502.3). Total num frames: 19181568. Throughput: 0: 1438.7. Samples: 790964. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:04:54,929][14395] Avg episode reward: [(0, '4.371')]
+[2024-11-07 16:04:58,806][14461] Updated weights for policy 0, policy_version 4690 (0.0029)
+[2024-11-07 16:04:59,929][14395] Fps is (10 sec: 5733.3, 60 sec: 5692.2, 300 sec: 5526.1). Total num frames: 19214336. Throughput: 0: 1440.9. Samples: 800180. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:04:59,932][14395] Avg episode reward: [(0, '4.705')]
+[2024-11-07 16:04:59,948][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004691_19214336.pth...
+[2024-11-07 16:05:00,105][14445] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004372_17907712.pth
+[2024-11-07 16:05:04,927][14395] Fps is (10 sec: 6144.2, 60 sec: 5734.4, 300 sec: 5498.4). Total num frames: 19243008. Throughput: 0: 1419.7. Samples: 803908. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:05:04,930][14395] Avg episode reward: [(0, '4.499')]
+[2024-11-07 16:05:06,089][14461] Updated weights for policy 0, policy_version 4700 (0.0041)
+[2024-11-07 16:05:10,766][14395] Fps is (10 sec: 4535.6, 60 sec: 5588.2, 300 sec: 5455.1). Total num frames: 19263488. Throughput: 0: 1385.6. Samples: 813096. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:05:10,772][14395] Avg episode reward: [(0, '4.577')]
+[2024-11-07 16:05:14,927][14395] Fps is (10 sec: 4505.6, 60 sec: 5529.6, 300 sec: 5470.6). Total num frames: 19288064. Throughput: 0: 1329.8. Samples: 818690. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:05:14,930][14395] Avg episode reward: [(0, '4.508')]
+[2024-11-07 16:05:15,214][14461] Updated weights for policy 0, policy_version 4710 (0.0036)
+[2024-11-07 16:05:19,927][14395] Fps is (10 sec: 5365.2, 60 sec: 5461.3, 300 sec: 5442.8). Total num frames: 19312640. Throughput: 0: 1343.5. Samples: 821712. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
+[2024-11-07 16:05:19,937][14395] Avg episode reward: [(0, '4.623')]
+[2024-11-07 16:05:23,759][14461] Updated weights for policy 0, policy_version 4720 (0.0068)
+[2024-11-07 16:05:24,927][14395] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5401.2). Total num frames: 19337216. Throughput: 0: 1355.1. Samples: 829614. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:05:24,929][14395] Avg episode reward: [(0, '4.623')]
+[2024-11-07 16:05:29,927][14395] Fps is (10 sec: 4915.1, 60 sec: 5188.6, 300 sec: 5415.0). Total num frames: 19361792. Throughput: 0: 1304.9. Samples: 836456. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:05:29,931][14395] Avg episode reward: [(0, '4.453')]
+[2024-11-07 16:05:31,546][14461] Updated weights for policy 0, policy_version 4730 (0.0066)
+[2024-11-07 16:05:34,927][14395] Fps is (10 sec: 5734.4, 60 sec: 5452.7, 300 sec: 5456.7). Total num frames: 19394560. Throughput: 0: 1324.0. Samples: 841884. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:05:34,930][14395] Avg episode reward: [(0, '4.237')]
+[2024-11-07 16:05:37,528][14461] Updated weights for policy 0, policy_version 4740 (0.0035)
+[2024-11-07 16:05:39,927][14395] Fps is (10 sec: 6963.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 19431424. Throughput: 0: 1362.9. Samples: 852294. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:05:39,929][14395] Avg episode reward: [(0, '4.496')]
+[2024-11-07 16:05:42,481][14461] Updated weights for policy 0, policy_version 4750 (0.0028)
+[2024-11-07 16:05:45,234][14395] Fps is (10 sec: 5961.3, 60 sec: 5501.5, 300 sec: 5464.9). Total num frames: 19456000. Throughput: 0: 1287.3. Samples: 858502. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:05:45,236][14395] Avg episode reward: [(0, '4.379')]
+[2024-11-07 16:05:49,850][14461] Updated weights for policy 0, policy_version 4760 (0.0026)
+[2024-11-07 16:05:49,927][14395] Fps is (10 sec: 6553.6, 60 sec: 5666.1, 300 sec: 5484.5). Total num frames: 19496960. Throughput: 0: 1380.0. Samples: 866006. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:05:49,929][14395] Avg episode reward: [(0, '4.544')]
+[2024-11-07 16:05:54,927][14395] Fps is (10 sec: 8028.4, 60 sec: 5871.0, 300 sec: 5512.3). Total num frames: 19533824. Throughput: 0: 1476.0. Samples: 878278. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:05:54,929][14395] Avg episode reward: [(0, '4.481')]
+[2024-11-07 16:05:54,982][14461] Updated weights for policy 0, policy_version 4770 (0.0025)
+[2024-11-07 16:05:59,923][14461] Updated weights for policy 0, policy_version 4780 (0.0029)
+[2024-11-07 16:05:59,927][14395] Fps is (10 sec: 8192.0, 60 sec: 6075.9, 300 sec: 5581.7). Total num frames: 19578880. Throughput: 0: 1594.6. Samples: 890448. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:05:59,929][14395] Avg episode reward: [(0, '4.536')]
+[2024-11-07 16:06:04,927][14395] Fps is (10 sec: 7782.5, 60 sec: 6144.0, 300 sec: 5651.1). Total num frames: 19611648. Throughput: 0: 1644.9. Samples: 895734. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:06:04,928][14395] Avg episode reward: [(0, '4.282')]
+[2024-11-07 16:06:05,438][14461] Updated weights for policy 0, policy_version 4790 (0.0027)
+[2024-11-07 16:06:09,927][14395] Fps is (10 sec: 7372.8, 60 sec: 6577.3, 300 sec: 5706.6). Total num frames: 19652608. Throughput: 0: 1738.2. Samples: 907832. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:06:09,930][14395] Avg episode reward: [(0, '4.543')]
+[2024-11-07 16:06:10,827][14461] Updated weights for policy 0, policy_version 4800 (0.0028)
+[2024-11-07 16:06:14,927][14395] Fps is (10 sec: 7782.3, 60 sec: 6690.1, 300 sec: 5748.3). Total num frames: 19689472. Throughput: 0: 1832.5. Samples: 918920. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:06:14,932][14395] Avg episode reward: [(0, '4.297')]
+[2024-11-07 16:06:16,033][14461] Updated weights for policy 0, policy_version 4810 (0.0029)
+[2024-11-07 16:06:19,927][14395] Fps is (10 sec: 6143.9, 60 sec: 6690.1, 300 sec: 5748.3). Total num frames: 19714048. Throughput: 0: 1850.5. Samples: 925158. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
+[2024-11-07 16:06:19,931][14395] Avg episode reward: [(0, '4.377')]
+[2024-11-07 16:06:23,171][14461] Updated weights for policy 0, policy_version 4820 (0.0031)
+[2024-11-07 16:06:24,927][14395] Fps is (10 sec: 6553.6, 60 sec: 6963.2, 300 sec: 5776.1). Total num frames: 19755008. Throughput: 0: 1792.7. Samples: 932966. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
+[2024-11-07 16:06:24,929][14395] Avg episode reward: [(0, '4.330')]
+[2024-11-07 16:06:28,033][14461] Updated weights for policy 0, policy_version 4830 (0.0030)
+[2024-11-07 16:06:29,927][14395] Fps is (10 sec: 8192.1, 60 sec: 7236.3, 300 sec: 5803.8). Total num frames: 19795968. Throughput: 0: 1945.3. Samples: 945444. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
+[2024-11-07 16:06:29,929][14395] Avg episode reward: [(0, '4.467')]
+[2024-11-07 16:06:33,086][14461] Updated weights for policy 0, policy_version 4840 (0.0030)
+[2024-11-07 16:06:34,927][14395] Fps is (10 sec: 8192.0, 60 sec: 7372.8, 300 sec: 5831.6). Total num frames: 19836928. Throughput: 0: 1899.5. Samples: 951484. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:06:34,929][14395] Avg episode reward: [(0, '4.250')]
+[2024-11-07 16:06:37,870][14461] Updated weights for policy 0, policy_version 4850 (0.0034)
+[2024-11-07 16:06:39,927][14395] Fps is (10 sec: 8192.1, 60 sec: 7441.1, 300 sec: 5914.9). Total num frames: 19877888. Throughput: 0: 1907.3. Samples: 964104. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:06:39,928][14395] Avg episode reward: [(0, '4.500')]
+[2024-11-07 16:06:43,599][14461] Updated weights for policy 0, policy_version 4860 (0.0025)
+[2024-11-07 16:06:44,927][14395] Fps is (10 sec: 7782.4, 60 sec: 7685.1, 300 sec: 5928.8). Total num frames: 19914752. Throughput: 0: 1879.1. Samples: 975008. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
+[2024-11-07 16:06:44,930][14395] Avg episode reward: [(0, '4.327')]
+[2024-11-07 16:06:49,088][14461] Updated weights for policy 0, policy_version 4870 (0.0028)
+[2024-11-07 16:06:49,927][14395] Fps is (10 sec: 7372.8, 60 sec: 7577.6, 300 sec: 5970.5). Total num frames: 19951616. Throughput: 0: 1880.6. Samples: 980362. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:06:49,929][14395] Avg episode reward: [(0, '4.475')]
+[2024-11-07 16:06:54,927][14395] Fps is (10 sec: 6144.0, 60 sec: 7372.8, 300 sec: 5928.8). Total num frames: 19976192. Throughput: 0: 1814.1. Samples: 989466. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
+[2024-11-07 16:06:54,930][14395] Avg episode reward: [(0, '4.451')]
+[2024-11-07 16:06:56,409][14461] Updated weights for policy 0, policy_version 4880 (0.0033)
+[2024-11-07 16:06:58,350][14445] Stopping Batcher_0...
+[2024-11-07 16:06:58,350][14395] Component Batcher_0 stopped!
+[2024-11-07 16:06:58,352][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth...
+[2024-11-07 16:06:58,355][14445] Loop batcher_evt_loop terminating...
+[2024-11-07 16:06:58,434][14461] Weights refcount: 2 0
+[2024-11-07 16:06:58,440][14461] Stopping InferenceWorker_p0-w0...
+[2024-11-07 16:06:58,441][14461] Loop inference_proc0-0_evt_loop terminating...
+[2024-11-07 16:06:58,440][14395] Component InferenceWorker_p0-w0 stopped!
+[2024-11-07 16:06:58,493][14395] Component RolloutWorker_w3 stopped!
+[2024-11-07 16:06:58,495][14468] Stopping RolloutWorker_w3...
+[2024-11-07 16:06:58,501][14468] Loop rollout_proc3_evt_loop terminating...
+[2024-11-07 16:06:58,516][14445] Removing /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004532_18563072.pth
+[2024-11-07 16:06:58,521][14395] Component RolloutWorker_w4 stopped!
+[2024-11-07 16:06:58,526][14445] Saving /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth...
+[2024-11-07 16:06:58,531][14477] Stopping RolloutWorker_w4...
+[2024-11-07 16:06:58,535][14477] Loop rollout_proc4_evt_loop terminating...
+[2024-11-07 16:06:58,540][14395] Component RolloutWorker_w1 stopped!
+[2024-11-07 16:06:58,543][14467] Stopping RolloutWorker_w1...
+[2024-11-07 16:06:58,554][14467] Loop rollout_proc1_evt_loop terminating...
+[2024-11-07 16:06:58,556][14395] Component RolloutWorker_w0 stopped!
+[2024-11-07 16:06:58,556][14462] Stopping RolloutWorker_w0...
+[2024-11-07 16:06:58,564][14462] Loop rollout_proc0_evt_loop terminating...
+[2024-11-07 16:06:58,608][14470] Stopping RolloutWorker_w6...
+[2024-11-07 16:06:58,608][14395] Component RolloutWorker_w6 stopped!
+[2024-11-07 16:06:58,609][14470] Loop rollout_proc6_evt_loop terminating...
+[2024-11-07 16:06:58,728][14445] Stopping LearnerWorker_p0...
+[2024-11-07 16:06:58,728][14445] Loop learner_proc0_evt_loop terminating...
+[2024-11-07 16:06:58,728][14395] Component LearnerWorker_p0 stopped!
+[2024-11-07 16:06:58,738][14395] Component RolloutWorker_w7 stopped!
+[2024-11-07 16:06:58,740][14469] Stopping RolloutWorker_w7...
+[2024-11-07 16:06:58,825][14469] Loop rollout_proc7_evt_loop terminating...
+[2024-11-07 16:06:59,006][14395] Component RolloutWorker_w2 stopped!
+[2024-11-07 16:06:59,004][14466] Stopping RolloutWorker_w2...
+[2024-11-07 16:06:59,007][14466] Loop rollout_proc2_evt_loop terminating...
+[2024-11-07 16:06:59,405][14395] Component RolloutWorker_w5 stopped!
+[2024-11-07 16:06:59,412][14395] Waiting for process learner_proc0 to stop...
+[2024-11-07 16:06:59,416][14478] Stopping RolloutWorker_w5...
+[2024-11-07 16:06:59,424][14478] Loop rollout_proc5_evt_loop terminating...
+[2024-11-07 16:07:02,027][14395] Waiting for process inference_proc0-0 to join...
+[2024-11-07 16:07:02,029][14395] Waiting for process rollout_proc0 to join...
+[2024-11-07 16:07:02,030][14395] Waiting for process rollout_proc1 to join...
+[2024-11-07 16:07:02,031][14395] Waiting for process rollout_proc2 to join...
+[2024-11-07 16:07:02,033][14395] Waiting for process rollout_proc3 to join...
+[2024-11-07 16:07:02,035][14395] Waiting for process rollout_proc4 to join...
+[2024-11-07 16:07:02,038][14395] Waiting for process rollout_proc5 to join...
+[2024-11-07 16:07:02,040][14395] Waiting for process rollout_proc6 to join...
+[2024-11-07 16:07:02,043][14395] Waiting for process rollout_proc7 to join...
+[2024-11-07 16:07:02,046][14395] Batcher 0 profile tree view:
+batching: 32.3104, releasing_batches: 0.0488
+[2024-11-07 16:07:02,048][14395] InferenceWorker_p0-w0 profile tree view:
+wait_policy: 0.0004
+  wait_policy_total: 9.8218
+update_model: 10.4168
+  weight_update: 0.0027
+one_step: 0.0077
+  handle_policy_step: 648.0918
+    deserialize: 19.6855, stack: 3.1513, obs_to_device_normalize: 190.8703, forward: 275.0630, send_messages: 49.3024
+    prepare_outputs: 88.3335
+      to_cpu: 66.1440
+[2024-11-07 16:07:02,050][14395] Learner 0 profile tree view:
+misc: 0.0066, prepare_batch: 31.8707
+train: 129.8584
+  epoch_init: 0.0102, minibatch_init: 0.0110, losses_postprocess: 3.2725, kl_divergence: 1.3944, after_optimizer: 4.4368
+  calculate_losses: 43.1480
+    losses_init: 0.0066, forward_head: 4.2252, bptt_initial: 29.1641, tail: 1.5559, advantages_returns: 0.3835, losses: 3.9513
+    bptt: 3.5262
+      bptt_forward_core: 3.3479
+  update: 76.8025
+    clip: 1.5711
+[2024-11-07 16:07:02,052][14395] RolloutWorker_w0 profile tree view:
+wait_for_trajectories: 0.2704, enqueue_policy_requests: 17.1022, env_step: 199.2890, overhead: 15.8391, complete_rollouts: 0.7729
+save_policy_outputs: 23.6707
+  split_output_tensors: 8.6823
+[2024-11-07 16:07:02,054][14395] RolloutWorker_w7 profile tree view:
+wait_for_trajectories: 0.2330, enqueue_policy_requests: 15.3368, env_step: 303.0272, overhead: 17.1887, complete_rollouts: 0.5677
+save_policy_outputs: 18.8034
+  split_output_tensors: 6.3821
+[2024-11-07 16:07:02,056][14395] Loop Runner_EvtLoop terminating...
+[2024-11-07 16:07:02,058][14395] Runner profile tree view:
+main_loop: 717.3057
+[2024-11-07 16:07:02,062][14395] Collected {0: 20004864}, FPS: 5561.8
+[2024-11-07 16:07:04,057][14395] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
+[2024-11-07 16:07:04,058][14395] Overriding arg 'num_workers' with value 1 passed from command line
+[2024-11-07 16:07:04,060][14395] Adding new argument 'no_render'=True that is not in the saved config file!
+[2024-11-07 16:07:04,063][14395] Adding new argument 'save_video'=True that is not in the saved config file!
+[2024-11-07 16:07:04,064][14395] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2024-11-07 16:07:04,066][14395] Adding new argument 'video_name'=None that is not in the saved config file!
+[2024-11-07 16:07:04,068][14395] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
+[2024-11-07 16:07:04,069][14395] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
+[2024-11-07 16:07:04,071][14395] Adding new argument 'push_to_hub'=False that is not in the saved config file!
+[2024-11-07 16:07:04,073][14395] Adding new argument 'hf_repository'=None that is not in the saved config file!
+[2024-11-07 16:07:04,076][14395] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2024-11-07 16:07:04,078][14395] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2024-11-07 16:07:04,079][14395] Adding new argument 'train_script'=None that is not in the saved config file!
+[2024-11-07 16:07:04,080][14395] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2024-11-07 16:07:04,081][14395] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2024-11-07 16:07:04,121][14395] Doom resolution: 160x120, resize resolution: (128, 72)
+[2024-11-07 16:07:04,125][14395] RunningMeanStd input shape: (3, 72, 128)
+[2024-11-07 16:07:04,128][14395] RunningMeanStd input shape: (1,)
+[2024-11-07 16:07:04,150][14395] ConvEncoder: input_channels=3
+[2024-11-07 16:07:04,283][14395] Conv encoder output size: 512
+[2024-11-07 16:07:04,285][14395] Policy head output size: 512
+[2024-11-07 16:07:05,321][14395] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth...
+[2024-11-07 16:07:06,188][14395] Num frames 100...
+[2024-11-07 16:07:06,422][14395] Num frames 200...
+[2024-11-07 16:07:06,617][14395] Num frames 300...
+[2024-11-07 16:07:06,844][14395] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2024-11-07 16:07:06,845][14395] Avg episode reward: 3.840, avg true_objective: 3.840
+[2024-11-07 16:07:06,884][14395] Num frames 400...
+[2024-11-07 16:07:07,096][14395] Num frames 500...
+[2024-11-07 16:07:07,280][14395] Num frames 600...
+[2024-11-07 16:07:07,482][14395] Num frames 700...
+[2024-11-07 16:07:07,675][14395] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
+[2024-11-07 16:07:07,677][14395] Avg episode reward: 3.840, avg true_objective: 3.840
+[2024-11-07 16:07:07,748][14395] Num frames 800...
+[2024-11-07 16:07:07,932][14395] Num frames 900...
+[2024-11-07 16:07:08,126][14395] Num frames 1000...
+[2024-11-07 16:07:08,328][14395] Num frames 1100...
+[2024-11-07 16:07:08,555][14395] Avg episode rewards: #0: 4.280, true rewards: #0: 3.947
+[2024-11-07 16:07:08,556][14395] Avg episode reward: 4.280, avg true_objective: 3.947
+[2024-11-07 16:07:08,592][14395] Num frames 1200...
+[2024-11-07 16:07:08,781][14395] Num frames 1300...
+[2024-11-07 16:07:08,969][14395] Num frames 1400...
+[2024-11-07 16:07:09,158][14395] Num frames 1500...
+[2024-11-07 16:07:09,362][14395] Avg episode rewards: #0: 4.170, true rewards: #0: 3.920
+[2024-11-07 16:07:09,364][14395] Avg episode reward: 4.170, avg true_objective: 3.920
+[2024-11-07 16:07:09,437][14395] Num frames 1600...
+[2024-11-07 16:07:09,623][14395] Num frames 1700...
+[2024-11-07 16:07:09,824][14395] Num frames 1800...
+[2024-11-07 16:07:10,015][14395] Num frames 1900...
+[2024-11-07 16:07:10,204][14395] Num frames 2000...
+[2024-11-07 16:07:10,297][14395] Avg episode rewards: #0: 4.432, true rewards: #0: 4.032
+[2024-11-07 16:07:10,298][14395] Avg episode reward: 4.432, avg true_objective: 4.032
+[2024-11-07 16:07:10,613][14395] Num frames 2100...
+[2024-11-07 16:07:10,776][14395] Num frames 2200...
+[2024-11-07 16:07:10,931][14395] Num frames 2300...
+[2024-11-07 16:07:11,083][14395] Num frames 2400...
+[2024-11-07 16:07:11,136][14395] Avg episode rewards: #0: 4.333, true rewards: #0: 4.000
+[2024-11-07 16:07:11,141][14395] Avg episode reward: 4.333, avg true_objective: 4.000
+[2024-11-07 16:07:11,310][14395] Num frames 2500...
+[2024-11-07 16:07:11,467][14395] Num frames 2600...
+[2024-11-07 16:07:11,629][14395] Num frames 2700...
+[2024-11-07 16:07:11,821][14395] Avg episode rewards: #0: 4.263, true rewards: #0: 3.977
+[2024-11-07 16:07:11,823][14395] Avg episode reward: 4.263, avg true_objective: 3.977
+[2024-11-07 16:07:11,861][14395] Num frames 2800...
+[2024-11-07 16:07:12,057][14395] Num frames 2900...
+[2024-11-07 16:07:12,242][14395] Num frames 3000...
+[2024-11-07 16:07:12,403][14395] Num frames 3100...
+[2024-11-07 16:07:12,565][14395] Avg episode rewards: #0: 4.210, true rewards: #0: 3.960
+[2024-11-07 16:07:12,568][14395] Avg episode reward: 4.210, avg true_objective: 3.960
+[2024-11-07 16:07:12,641][14395] Num frames 3200...
+[2024-11-07 16:07:12,801][14395] Num frames 3300...
+[2024-11-07 16:07:12,954][14395] Num frames 3400...
+[2024-11-07 16:07:13,121][14395] Num frames 3500...
+[2024-11-07 16:07:13,265][14395] Avg episode rewards: #0: 4.169, true rewards: #0: 3.947
+[2024-11-07 16:07:13,266][14395] Avg episode reward: 4.169, avg true_objective: 3.947
+[2024-11-07 16:07:13,351][14395] Num frames 3600...
+[2024-11-07 16:07:13,513][14395] Num frames 3700...
+[2024-11-07 16:07:13,670][14395] Num frames 3800...
+[2024-11-07 16:07:13,825][14395] Num frames 3900...
+[2024-11-07 16:07:13,938][14395] Avg episode rewards: #0: 4.136, true rewards: #0: 3.936
+[2024-11-07 16:07:13,942][14395] Avg episode reward: 4.136, avg true_objective: 3.936
+[2024-11-07 16:07:21,728][14395] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!
+[2024-11-07 16:07:24,406][14395] Loading existing experiment configuration from /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/config.json
+[2024-11-07 16:07:24,407][14395] Overriding arg 'num_workers' with value 1 passed from command line
+[2024-11-07 16:07:24,408][14395] Adding new argument 'no_render'=True that is not in the saved config file!
+[2024-11-07 16:07:24,410][14395] Adding new argument 'save_video'=True that is not in the saved config file!
+[2024-11-07 16:07:24,410][14395] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
+[2024-11-07 16:07:24,412][14395] Adding new argument 'video_name'=None that is not in the saved config file!
+[2024-11-07 16:07:24,414][14395] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
+[2024-11-07 16:07:24,415][14395] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
+[2024-11-07 16:07:24,417][14395] Adding new argument 'push_to_hub'=True that is not in the saved config file!
+[2024-11-07 16:07:24,419][14395] Adding new argument 'hf_repository'='alidenewade/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
+[2024-11-07 16:07:24,420][14395] Adding new argument 'policy_index'=0 that is not in the saved config file!
+[2024-11-07 16:07:24,422][14395] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
+[2024-11-07 16:07:24,423][14395] Adding new argument 'train_script'=None that is not in the saved config file!
+[2024-11-07 16:07:24,424][14395] Adding new argument 'enjoy_script'=None that is not in the saved config file!
+[2024-11-07 16:07:24,426][14395] Using frameskip 1 and render_action_repeat=4 for evaluation
+[2024-11-07 16:07:24,453][14395] RunningMeanStd input shape: (3, 72, 128)
+[2024-11-07 16:07:24,455][14395] RunningMeanStd input shape: (1,)
+[2024-11-07 16:07:24,467][14395] ConvEncoder: input_channels=3
+[2024-11-07 16:07:24,506][14395] Conv encoder output size: 512
+[2024-11-07 16:07:24,508][14395] Policy head output size: 512
+[2024-11-07 16:07:24,534][14395] Loading state from checkpoint /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth...
+[2024-11-07 16:07:25,029][14395] Num frames 100...
+[2024-11-07 16:07:25,225][14395] Num frames 200...
+[2024-11-07 16:07:25,408][14395] Num frames 300...
+[2024-11-07 16:07:25,589][14395] Num frames 400...
+[2024-11-07 16:07:25,732][14395] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480
+[2024-11-07 16:07:25,735][14395] Avg episode reward: 5.480, avg true_objective: 4.480
+[2024-11-07 16:07:25,849][14395] Num frames 500...
+[2024-11-07 16:07:26,042][14395] Num frames 600...
+[2024-11-07 16:07:26,234][14395] Num frames 700...
+[2024-11-07 16:07:28,651][14395] Num frames 800...
+[2024-11-07 16:07:28,777][14395] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
+[2024-11-07 16:07:28,780][14395] Avg episode reward: 4.660, avg true_objective: 4.160
+[2024-11-07 16:07:28,970][14395] Num frames 900...
+[2024-11-07 16:07:29,166][14395] Num frames 1000...
+[2024-11-07 16:07:29,376][14395] Num frames 1100...
+[2024-11-07 16:07:29,567][14395] Num frames 1200...
+[2024-11-07 16:07:29,644][14395] Avg episode rewards: #0: 4.367, true rewards: #0: 4.033
+[2024-11-07 16:07:29,645][14395] Avg episode reward: 4.367, avg true_objective: 4.033
+[2024-11-07 16:07:29,827][14395] Num frames 1300...
+[2024-11-07 16:07:30,024][14395] Num frames 1400...
+[2024-11-07 16:07:30,223][14395] Num frames 1500...
+[2024-11-07 16:07:30,454][14395] Avg episode rewards: #0: 4.235, true rewards: #0: 3.985
+[2024-11-07 16:07:30,458][14395] Avg episode reward: 4.235, avg true_objective: 3.985
+[2024-11-07 16:07:30,487][14395] Num frames 1600...
+[2024-11-07 16:07:30,674][14395] Num frames 1700...
+[2024-11-07 16:07:30,864][14395] Num frames 1800...
+[2024-11-07 16:07:31,053][14395] Num frames 1900...
+[2024-11-07 16:07:31,263][14395] Avg episode rewards: #0: 4.156, true rewards: #0: 3.956
+[2024-11-07 16:07:31,266][14395] Avg episode reward: 4.156, avg true_objective: 3.956
+[2024-11-07 16:07:31,328][14395] Num frames 2000...
+[2024-11-07 16:07:31,507][14395] Num frames 2100...
+[2024-11-07 16:07:31,687][14395] Num frames 2200...
+[2024-11-07 16:07:31,875][14395] Num frames 2300...
+[2024-11-07 16:07:32,054][14395] Num frames 2400...
+[2024-11-07 16:07:32,158][14395] Avg episode rewards: #0: 4.377, true rewards: #0: 4.043
+[2024-11-07 16:07:32,160][14395] Avg episode reward: 4.377, avg true_objective: 4.043
+[2024-11-07 16:07:32,315][14395] Num frames 2500...
+[2024-11-07 16:07:32,504][14395] Num frames 2600...
+[2024-11-07 16:07:32,692][14395] Num frames 2700...
+[2024-11-07 16:07:32,883][14395] Num frames 2800...
+[2024-11-07 16:07:32,959][14395] Avg episode rewards: #0: 4.300, true rewards: #0: 4.014
+[2024-11-07 16:07:32,961][14395] Avg episode reward: 4.300, avg true_objective: 4.014
+[2024-11-07 16:07:33,137][14395] Num frames 2900...
+[2024-11-07 16:07:33,331][14395] Num frames 3000...
+[2024-11-07 16:07:33,528][14395] Num frames 3100...
+[2024-11-07 16:07:33,770][14395] Avg episode rewards: #0: 4.243, true rewards: #0: 3.992
+[2024-11-07 16:07:33,771][14395] Avg episode reward: 4.243, avg true_objective: 3.992
+[2024-11-07 16:07:33,782][14395] Num frames 3200...
+[2024-11-07 16:07:33,993][14395] Num frames 3300...
+[2024-11-07 16:07:34,194][14395] Num frames 3400...
+[2024-11-07 16:07:34,400][14395] Num frames 3500...
+[2024-11-07 16:07:34,611][14395] Avg episode rewards: #0: 4.198, true rewards: #0: 3.976
+[2024-11-07 16:07:34,612][14395] Avg episode reward: 4.198, avg true_objective: 3.976
+[2024-11-07 16:07:34,657][14395] Num frames 3600...
+[2024-11-07 16:07:34,862][14395] Num frames 3700...
+[2024-11-07 16:07:35,065][14395] Num frames 3800...
+[2024-11-07 16:07:35,201][14395] Avg episode rewards: #0: 4.034, true rewards: #0: 3.834
+[2024-11-07 16:07:35,203][14395] Avg episode reward: 4.034, avg true_objective: 3.834
+[2024-11-07 16:07:42,638][14395] Replay video saved to /root/hfRL/ml/LunarLander-v2/train_dir/default_experiment/replay.mp4!